diff options
author | Yan Pas <yanp.bugz@gmail.com> | 2018-10-07 18:10:01 +0300 |
---|---|---|
committer | Yan Pas <yanp.bugz@gmail.com> | 2018-10-07 18:10:01 +0300 |
commit | 27467189ab184c5d098e244e01f7d1bfdb0d4d45 (patch) | |
tree | d1fb96ebbc49ee0c4e73ef354feddd521690d545 /src/Text | |
parent | 4f3dd3b1af7217214287ab886147c5e33a54774d (diff) | |
parent | bd8a66394bc25b52dca9ffd963a560a4ca492f9c (diff) | |
download | pandoc-27467189ab184c5d098e244e01f7d1bfdb0d4d45.tar.gz |
Merge branch 'master' into groff_reader
Diffstat (limited to 'src/Text')
85 files changed, 3756 insertions, 2668 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index a59fd9bbe..79d83c0d3 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -52,7 +52,7 @@ import Data.Aeson (defaultOptions) import Data.Aeson.TH (deriveJSON) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as B -import Data.Char (toLower, toUpper, isAscii, ord) +import Data.Char (toLower, toUpper) import Data.List (find, intercalate, isPrefixOf, isSuffixOf, sort) import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust, isNothing) @@ -62,8 +62,7 @@ import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TE import qualified Data.Text.Encoding.Error as TE -import Data.Yaml (decode) -import qualified Data.Yaml as Yaml +import qualified Data.YAML as YAML import GHC.Generics import Network.URI (URI (..), parseURI) #ifdef EMBED_DATA_FILES @@ -84,19 +83,18 @@ import System.Exit (exitSuccess) import System.FilePath import System.IO (nativeNewline, stdout) import qualified System.IO as IO (Newline (..)) -import System.IO.Error (isDoesNotExistError) import Text.Pandoc import Text.Pandoc.BCP47 (Lang (..), parseBCP47) import Text.Pandoc.Builder (setMeta, deleteMeta) import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters) import Text.Pandoc.Highlighting (highlightingStyles) import Text.Pandoc.PDF (makePDF) +import Text.Pandoc.Readers.Markdown (yamlToMeta) import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained) import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs, headerShift, isURI, ordNub, safeRead, tabFilter, uriPathToPath) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Writers.Math (defaultKaTeXURL, defaultMathJaxURL) -import Text.Pandoc.XML (toEntities) import Text.Printf #ifndef _WINDOWS import System.Posix.IO (stdOutput) @@ -144,6 +142,13 @@ engines = map ("html",) htmlEngines ++ pdfEngines :: [String] pdfEngines = ordNub $ map snd engines +pdfIsNoWriterErrorMsg :: String +pdfIsNoWriterErrorMsg = + "To create a pdf using pandoc, use " ++ + "-t latex|beamer|context|ms|html5" ++ + "\nand specify an output file with " ++ + ".pdf extension (-o filename.pdf)." + pdfWriterAndProg :: Maybe String -- ^ user-specified writer name -> Maybe String -- ^ user-specified pdf-engine -> IO (String, Maybe String) -- ^ IO (writerName, maybePdfEngineProg) @@ -155,9 +160,9 @@ pdfWriterAndProg mWriter mEngine = do where go Nothing Nothing = Right ("latex", "pdflatex") go (Just writer) Nothing = (writer,) <$> engineForWriter writer - go Nothing (Just engine) = (,engine) <$> writerForEngine engine + go Nothing (Just engine) = (,engine) <$> writerForEngine (takeBaseName engine) go (Just writer) (Just engine) = - case find (== (baseWriterName writer, engine)) engines of + case find (== (baseWriterName writer, takeBaseName engine)) engines of Just _ -> Right (writer, engine) Nothing -> Left $ "pdf-engine " ++ engine ++ " is not compatible with output format " ++ writer @@ -167,6 +172,7 @@ pdfWriterAndProg mWriter mEngine = do [] -> Left $ "pdf-engine " ++ eng ++ " not known" + engineForWriter "pdf" = Left pdfIsNoWriterErrorMsg engineForWriter w = case [e | (f,e) <- engines, f == baseWriterName w] of eng : _ -> Right eng [] -> Left $ @@ -235,11 +241,7 @@ convertWithOpts opts = do else case getWriter (map toLower writerName) of Left e -> E.throwIO $ PandocAppError $ if format == "pdf" - then e ++ - "\nTo create a pdf using pandoc, use " ++ - "-t latex|beamer|context|ms|html5" ++ - "\nand specify an output file with " ++ - ".pdf extension (-o filename.pdf)." + then e ++ "\n" ++ pdfIsNoWriterErrorMsg else e Right (w, es) -> return (w :: Writer PandocIO, es) @@ -381,11 +383,10 @@ convertWithOpts opts = do "" -> tp <.> format _ -> tp Just . UTF8.toString <$> - (readFileStrict tp' `catchError` + ((fst <$> fetchItem tp') `catchError` (\e -> case e of - PandocIOError _ e' | - isDoesNotExistError e' -> + PandocResourceNotFound _ -> readDataFile ("templates" </> tp') _ -> throwError e)) @@ -398,6 +399,10 @@ convertWithOpts opts = do ("application/xml", jatsCSL) return $ ("csl", jatsEncoded) : optMetadata opts else return $ optMetadata opts + metadataFromFile <- + case optMetadataFile opts of + Nothing -> return mempty + Just file -> readFileLazy file >>= yamlToMeta case lookup "lang" (optMetadata opts) of Just l -> case parseBCP47 l of @@ -437,6 +442,7 @@ convertWithOpts opts = do , writerTOCDepth = optTOCDepth opts , writerReferenceDoc = optReferenceDoc opts , writerSyntaxMap = syntaxMap + , writerPreferAscii = optAscii opts } let readerOpts = def{ @@ -490,6 +496,7 @@ convertWithOpts opts = do ( (if isJust (optExtractMedia opts) then fillMediaBag else return) + >=> return . addNonPresentMetadata metadataFromFile >=> return . addMetadata metadata >=> applyTransforms transforms >=> applyFilters readerOpts filters' [format] @@ -512,19 +519,10 @@ convertWithOpts opts = do let htmlFormat = format `elem` ["html","html4","html5","s5","slidy", "slideous","dzslides","revealjs"] - escape - | optAscii opts - , htmlFormat || format == "docbook4" || - format == "docbook5" || format == "docbook" || - format == "jats" || format == "opml" || - format == "icml" = toEntities - | optAscii opts - , format == "ms" || format == "man" = groffEscape - | otherwise = id addNl = if standalone then id else (<> T.singleton '\n') - output <- (addNl . escape) <$> f writerOptions doc + output <- addNl <$> f writerOptions doc writerFn eol outputFile =<< if optSelfContained opts && htmlFormat -- TODO not maximally efficient; change type @@ -532,12 +530,6 @@ convertWithOpts opts = do then T.pack <$> makeSelfContained (T.unpack output) else return output -groffEscape :: Text -> Text -groffEscape = T.concatMap toUchar - where toUchar c - | isAscii c = T.singleton c - | otherwise = T.pack $ printf "\\[u%04X]" (ord c) - type Transform = Pandoc -> Pandoc isTextFormat :: String -> Bool @@ -555,6 +547,7 @@ data Opt = Opt , optTemplate :: Maybe FilePath -- ^ Custom template , optVariables :: [(String,String)] -- ^ Template variables to set , optMetadata :: [(String, String)] -- ^ Metadata fields to set + , optMetadataFile :: Maybe FilePath -- ^ Name of YAML metadata file , optOutputFile :: Maybe FilePath -- ^ Name of output file , optInputFiles :: [FilePath] -- ^ Names of input files , optNumberSections :: Bool -- ^ Number sections in LaTeX @@ -598,7 +591,7 @@ data Opt = Opt , optPdfEngineArgs :: [String] -- ^ Flags to pass to the engine , optSlideLevel :: Maybe Int -- ^ Header level that creates slides , optSetextHeaders :: Bool -- ^ Use atx headers for markdown level 1-2 - , optAscii :: Bool -- ^ Use ascii characters only in html + , optAscii :: Bool -- ^ Prefer ascii output , optDefaultImageExtension :: String -- ^ Default image extension , optExtractMedia :: Maybe FilePath -- ^ Path to extract embedded media , optTrackChanges :: TrackChanges -- ^ Accept or reject MS Word track-changes. @@ -627,6 +620,7 @@ defaultOpts = Opt , optTemplate = Nothing , optVariables = [] , optMetadata = [] + , optMetadataFile = Nothing , optOutputFile = Nothing , optInputFiles = [] , optNumberSections = False @@ -686,6 +680,9 @@ defaultOpts = Opt , optStripComments = False } +addNonPresentMetadata :: Text.Pandoc.Meta -> Pandoc -> Pandoc +addNonPresentMetadata newmeta (Pandoc meta bs) = Pandoc (meta <> newmeta) bs + addMetadata :: [(String, String)] -> Pandoc -> Pandoc addMetadata kvs pdc = foldr addMeta (removeMetaKeys kvs pdc) kvs @@ -702,10 +699,12 @@ removeMetaKeys :: [(String,String)] -> Pandoc -> Pandoc removeMetaKeys kvs pdc = foldr (deleteMeta . fst) pdc kvs readMetaValue :: String -> MetaValue -readMetaValue s = case decode (UTF8.fromString s) of - Just (Yaml.String t) -> MetaString $ T.unpack t - Just (Yaml.Bool b) -> MetaBool b - _ -> MetaString s +readMetaValue s = case YAML.decodeStrict (UTF8.fromString s) of + Right [YAML.Scalar (YAML.SStr t)] + -> MetaString $ T.unpack t + Right [YAML.Scalar (YAML.SBool b)] + -> MetaBool b + _ -> MetaString s -- Determine default reader based on source file extensions defaultReaderName :: String -> [FilePath] -> String @@ -960,6 +959,12 @@ options = "KEY[:VALUE]") "" + , Option "" ["metadata-file"] + (ReqArg + (\arg opt -> return opt{ optMetadataFile = Just arg }) + "FILE") + "" + , Option "V" ["variable"] (ReqArg (\arg opt -> do @@ -1153,7 +1158,7 @@ options = , Option "" ["ascii"] (NoArg (\opt -> return opt { optAscii = True })) - "" -- "Use ascii characters only in HTML output" + "" -- "Prefer ASCII output" , Option "" ["reference-links"] (NoArg diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs index 4ade2dc6d..e47546dfc 100644 --- a/src/Text/Pandoc/Class.hs +++ b/src/Text/Pandoc/Class.hs @@ -132,7 +132,7 @@ import Network.HTTP.Client.Internal (addProxy) import Network.HTTP.Client.TLS (tlsManagerSettings) import System.Environment (getEnv) import Network.HTTP.Types.Header ( hContentType ) -import Network (withSocketsDo) +import Network.Socket (withSocketsDo) import Data.ByteString.Lazy (toChunks) import qualified Control.Exception as E import qualified Data.Time.LocalTime as IO (getCurrentTimeZone) @@ -882,10 +882,10 @@ adjustImagePath _ _ x = x -- of things that would normally be obtained through IO. data PureState = PureState { stStdGen :: StdGen , stWord8Store :: [Word8] -- should be - -- inifinite, + -- infinite, -- i.e. [1..] , stUniqStore :: [Int] -- should be - -- inifinite and + -- infinite and -- contain every -- element at most -- once, e.g. [1..] diff --git a/src/Text/Pandoc/Emoji.hs b/src/Text/Pandoc/Emoji.hs index 5cc965153..7d0af1a72 100644 --- a/src/Text/Pandoc/Emoji.hs +++ b/src/Text/Pandoc/Emoji.hs @@ -28,9 +28,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Emoji symbol lookup from canonical string identifier. -} -module Text.Pandoc.Emoji ( emojis ) where +module Text.Pandoc.Emoji ( emojis, emojiToInline ) where import Prelude import qualified Data.Map as M +import Text.Pandoc.Definition (Inline (Span, Str)) emojis :: M.Map String String emojis = M.fromList @@ -905,3 +906,7 @@ emojis = M.fromList ,("zero","0\65039\8419") ,("zzz","\128164") ] + +emojiToInline :: String -> Maybe Inline +emojiToInline emojikey = makeSpan <$> M.lookup emojikey emojis + where makeSpan = Span ("", ["emoji"], [("data-emoji", emojikey)]) . (:[]) . Str diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 5ccb7dffb..b60c57497 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -101,7 +101,10 @@ data Extension = -- and disallow laziness | Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php | Ext_east_asian_line_breaks -- ^ Newlines in paragraphs are ignored between - -- East Asian wide characters + -- East Asian wide characters. Note: this extension + -- does not affect readers/writers directly; it causes + -- the eastAsianLineBreakFilter to be applied after + -- parsing, in Text.Pandoc.App.convertWithOpts. | Ext_emoji -- ^ Support emoji like :smile: | Ext_empty_paragraphs -- ^ Allow empty paragraphs | Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML @@ -111,7 +114,7 @@ data Extension = | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks | Ext_fenced_code_blocks -- ^ Parse fenced code blocks | Ext_fenced_divs -- ^ Allow fenced div syntax ::: - | Ext_footnotes -- ^ Pandoc/PHP/MMD style footnotes + | Ext_footnotes -- ^ Pandoc\/PHP\/MMD style footnotes | Ext_four_space_rule -- ^ Require 4-space indent for list contents | Ext_gfm_auto_identifiers -- ^ Automatic identifiers for headers, using -- GitHub's method for generating identifiers diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs index 70bb70302..672eca392 100644 --- a/src/Text/Pandoc/Highlighting.hs +++ b/src/Text/Pandoc/Highlighting.hs @@ -45,6 +45,7 @@ module Text.Pandoc.Highlighting ( highlightingStyles , tango , kate , monochrome + , breezeDark , haddock , Style , fromListingsLanguage diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index c5fe98a66..d57f66da5 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -319,20 +319,22 @@ pngSize img = do (shift w1 24 + shift w2 16 + shift w3 8 + w4, shift h1 24 + shift h2 16 + shift h3 8 + h4) _ -> Nothing -- "PNG parse error" - let (dpix, dpiy) = findpHYs rest'' + (dpix, dpiy) <- findpHYs rest'' return ImageSize { pxX = x, pxY = y, dpiX = dpix, dpiY = dpiy } -findpHYs :: ByteString -> (Integer, Integer) +findpHYs :: ByteString -> Maybe (Integer, Integer) findpHYs x - | B.null x || "IDAT" `B.isPrefixOf` x = (72,72) + | B.null x || "IDAT" `B.isPrefixOf` x = return (72,72) | "pHYs" `B.isPrefixOf` x = - let [x1,x2,x3,x4,y1,y2,y3,y4,u] = - map fromIntegral $ unpack $ B.take 9 $ B.drop 4 x - factor = if u == 1 -- dots per meter - then \z -> z * 254 `div` 10000 - else const 72 - in ( factor $ shift x1 24 + shift x2 16 + shift x3 8 + x4, - factor $ shift y1 24 + shift y2 16 + shift y3 8 + y4 ) + case map fromIntegral $ unpack $ B.take 9 $ B.drop 4 x of + [x1,x2,x3,x4,y1,y2,y3,y4,u] -> do + let factor = if u == 1 -- dots per meter + then \z -> z * 254 `div` 10000 + else const 72 + return + ( factor $ shift x1 24 + shift x2 16 + shift x3 8 + x4, + factor $ shift y1 24 + shift y2 16 + shift y3 8 + y4 ) + _ -> mzero | otherwise = findpHYs $ B.drop 1 x -- read another byte gifSize :: ByteString -> Maybe ImageSize @@ -408,20 +410,21 @@ jpegSize img = jfifSize :: ByteString -> Either String ImageSize jfifSize rest = - let [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] = map fromIntegral - $ unpack $ B.take 5 $B.drop 9 rest - factor = case dpiDensity of - 1 -> id - 2 -> \x -> x * 254 `div` 10 - _ -> const 72 - dpix = factor (shift dpix1 8 + dpix2) - dpiy = factor (shift dpiy1 8 + dpiy2) - in case findJfifSize rest of - Left msg -> Left msg - Right (w,h) ->Right ImageSize { pxX = w + case map fromIntegral $ unpack $ B.take 5 $ B.drop 9 rest of + [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] -> + let factor = case dpiDensity of + 1 -> id + 2 -> \x -> x * 254 `div` 10 + _ -> const 72 + dpix = factor (shift dpix1 8 + dpix2) + dpiy = factor (shift dpiy1 8 + dpiy2) + in case findJfifSize rest of + Left msg -> Left msg + Right (w,h) -> Right ImageSize { pxX = w , pxY = h , dpiX = dpix , dpiY = dpiy } + _ -> Left "unable to determine JFIF size" findJfifSize :: ByteString -> Either String (Integer,Integer) findJfifSize bs = @@ -541,10 +544,12 @@ exifHeader hdr = do let resfactor = case lookup ResolutionUnit allentries of Just (UnsignedShort 1) -> 100 / 254 _ -> 1 - let xres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor) - $ lookup XResolution allentries - let yres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor) - $ lookup YResolution allentries + let xres = case lookup XResolution allentries of + Just (UnsignedRational x) -> floor (x * resfactor) + _ -> 72 + let yres = case lookup YResolution allentries of + Just (UnsignedRational y) -> floor (y * resfactor) + _ -> 72 return ImageSize{ pxX = wdth , pxY = hght diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs index cd7117074..e160f7123 100644 --- a/src/Text/Pandoc/Lua.hs +++ b/src/Text/Pandoc/Lua.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {- Copyright © 2017–2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -16,6 +15,7 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} +{-# LANGUAGE NoImplicitPrelude #-} {- | Module : Text.Pandoc.Lua Copyright : Copyright © 2017–2018 Albert Krewinkel @@ -34,14 +34,14 @@ module Text.Pandoc.Lua import Prelude import Control.Monad ((>=>)) -import Foreign.Lua (FromLuaStack (peek), Lua, LuaException (..), - Status (OK), ToLuaStack (push)) +import Foreign.Lua (Lua) import Text.Pandoc.Class (PandocIO) import Text.Pandoc.Definition (Pandoc) import Text.Pandoc.Lua.Filter (LuaFilter, walkMWithLuaFilter) -import Text.Pandoc.Lua.Init (runPandocLua, registerScriptPath) -import Text.Pandoc.Lua.Util (popValue) +import Text.Pandoc.Lua.Init (LuaException (..), runPandocLua, registerScriptPath) +import Text.Pandoc.Lua.Util (dofileWithTraceback) import Text.Pandoc.Options (ReaderOptions) + import qualified Foreign.Lua as Lua -- | Run the Lua filter in @filterPath@ for a transformation to target @@ -59,26 +59,24 @@ runLuaFilter' ropts filterPath format pd = do registerReaderOptions registerScriptPath filterPath top <- Lua.gettop - stat <- Lua.dofile filterPath - if stat /= OK - then do - luaErrMsg <- popValue - Lua.throwLuaError luaErrMsg + stat <- dofileWithTraceback filterPath + if stat /= Lua.OK + then Lua.throwTopMessage else do newtop <- Lua.gettop -- Use the returned filters, or the implicitly defined global filter if -- nothing was returned. luaFilters <- if newtop - top >= 1 - then peek (-1) - else Lua.getglobal "_G" *> fmap (:[]) popValue + then Lua.peek Lua.stackTop + else Lua.pushglobaltable *> fmap (:[]) Lua.popValue runAll luaFilters pd where registerFormat = do - push format + Lua.push format Lua.setglobal "FORMAT" registerReaderOptions = do - push ropts + Lua.push ropts Lua.setglobal "PANDOC_READER_OPTIONS" runAll :: [LuaFilter] -> Pandoc -> Lua Pandoc diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs index 264066305..d17f9a969 100644 --- a/src/Text/Pandoc/Lua/Filter.hs +++ b/src/Text/Pandoc/Lua/Filter.hs @@ -1,6 +1,33 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{- +Copyright © 2012-2018 John MacFarlane <jgm@berkeley.edu> + 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} {-# LANGUAGE FlexibleContexts #-} - +{-# LANGUAGE NoImplicitPrelude #-} +{- | +Module : Text.Pandoc.Lua.Filter +Copyright : © 2012–2018 John MacFarlane, + © 2017-2018 Albert Krewinkel +License : GNU GPL, version 2 or above +Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Stability : alpha + +Types and functions for running Lua filters. +-} module Text.Pandoc.Lua.Filter ( LuaFilterFunction , LuaFilter , tryFilter @@ -12,62 +39,58 @@ module Text.Pandoc.Lua.Filter ( LuaFilterFunction , inlineElementNames ) where import Prelude -import Control.Monad (mplus, unless, when, (>=>)) +import Control.Monad (mplus, (>=>)) import Control.Monad.Catch (finally) -import Text.Pandoc.Definition +import Data.Data (Data, DataType, dataTypeConstrs, dataTypeName, dataTypeOf, + showConstr, toConstr, tyconUQname) import Data.Foldable (foldrM) import Data.Map (Map) -import qualified Data.Map as Map -import qualified Foreign.Lua as Lua -import Foreign.Lua (FromLuaStack (peek), Lua, StackIndex, - Status (OK), ToLuaStack (push)) +import Foreign.Lua (Lua, Peekable, Pushable) +import Text.Pandoc.Definition +import Text.Pandoc.Lua.StackInstances () import Text.Pandoc.Walk (walkM, Walkable) -import Data.Data (Data, DataType, dataTypeConstrs, dataTypeName, dataTypeOf, - showConstr, toConstr, tyconUQname) -import Text.Pandoc.Lua.StackInstances() -import Text.Pandoc.Lua.Util (typeCheck) -type FunctionMap = Map String LuaFilterFunction - -newtype LuaFilterFunction = LuaFilterFunction { functionIndex :: Int } - -instance ToLuaStack LuaFilterFunction where - push = pushFilterFunction - -instance FromLuaStack LuaFilterFunction where - peek = registerFilterFunction - -newtype LuaFilter = LuaFilter FunctionMap - -instance FromLuaStack LuaFilter where - peek idx = - let constrs = metaFilterName : pandocFilterNames - ++ blockElementNames - ++ inlineElementNames - fn c acc = do - Lua.getfield idx c - filterFn <- Lua.tryLua (peek (-1)) - Lua.pop 1 +import qualified Data.Map.Strict as Map +import qualified Foreign.Lua as Lua +import qualified Text.Pandoc.Lua.Util as LuaUtil + +-- | Filter function stored in the registry +newtype LuaFilterFunction = LuaFilterFunction Lua.Reference + +-- | Collection of filter functions (at most one function per element +-- constructor) +newtype LuaFilter = LuaFilter (Map String LuaFilterFunction) + +instance Peekable LuaFilter where + peek idx = do + let constrs = metaFilterName + : pandocFilterNames + ++ blockElementNames + ++ inlineElementNames + let go constr acc = do + Lua.getfield idx constr + filterFn <- registerFilterFunction return $ case filterFn of - Left _ -> acc - Right f -> (c, f) : acc - in LuaFilter . Map.fromList <$> foldrM fn [] constrs - --- | Push the filter function to the top of the stack. + Nothing -> acc + Just fn -> Map.insert constr fn acc + LuaFilter <$> foldrM go Map.empty constrs + +-- | Register the function at the top of the stack as a filter function in the +-- registry. +registerFilterFunction :: Lua (Maybe LuaFilterFunction) +registerFilterFunction = do + isFn <- Lua.isfunction Lua.stackTop + if isFn + then Just . LuaFilterFunction <$> Lua.ref Lua.registryindex + else Nothing <$ Lua.pop 1 + +-- | Retrieve filter function from registry and push it to the top of the stack. pushFilterFunction :: LuaFilterFunction -> Lua () -pushFilterFunction lf = - -- The function is stored in a lua registry table, retrieve it from there. - Lua.rawgeti Lua.registryindex (functionIndex lf) - -registerFilterFunction :: StackIndex -> Lua LuaFilterFunction -registerFilterFunction idx = do - isFn <- Lua.isfunction idx - unless isFn . Lua.throwLuaError $ "Not a function at index " ++ show idx - Lua.pushvalue idx - refIdx <- Lua.ref Lua.registryindex - return $ LuaFilterFunction refIdx - -elementOrList :: FromLuaStack a => a -> Lua [a] +pushFilterFunction (LuaFilterFunction fnRef) = + Lua.getref Lua.registryindex fnRef + + +elementOrList :: Peekable a => a -> Lua [a] elementOrList x = do let topOfStack = Lua.stackTop elementUnchanged <- Lua.isnil topOfStack @@ -77,12 +100,10 @@ elementOrList x = do mbres <- Lua.peekEither topOfStack case mbres of Right res -> [res] <$ Lua.pop 1 - Left _ -> do - typeCheck Lua.stackTop Lua.TypeTable - Lua.toList topOfStack `finally` Lua.pop 1 + Left _ -> Lua.peekList topOfStack `finally` Lua.pop 1 -- | Try running a filter for the given element -tryFilter :: (Data a, FromLuaStack a, ToLuaStack a) +tryFilter :: (Data a, Peekable a, Pushable a) => LuaFilter -> a -> Lua [a] tryFilter (LuaFilter fnMap) x = let filterFnName = showConstr (toConstr x) @@ -96,14 +117,11 @@ tryFilter (LuaFilter fnMap) x = -- called with given element as argument and is expected to return an element. -- Alternatively, the function can return nothing or nil, in which case the -- element is left unchanged. -runFilterFunction :: ToLuaStack a => LuaFilterFunction -> a -> Lua () +runFilterFunction :: Pushable a => LuaFilterFunction -> a -> Lua () runFilterFunction lf x = do pushFilterFunction lf - push x - z <- Lua.pcall 1 1 Nothing - when (z /= OK) $ do - let addPrefix = ("Error while running filter function: " ++) - Lua.throwTopMessageAsError' addPrefix + Lua.push x + LuaUtil.callWithTraceback 1 1 walkMWithLuaFilter :: LuaFilter -> Pandoc -> Lua Pandoc walkMWithLuaFilter f = @@ -156,7 +174,7 @@ metaFilterName = "Meta" pandocFilterNames :: [String] pandocFilterNames = ["Pandoc", "Doc"] -singleElement :: FromLuaStack a => a -> Lua a +singleElement :: Peekable a => a -> Lua a singleElement x = do elementUnchanged <- Lua.isnil (-1) if elementUnchanged @@ -167,6 +185,6 @@ singleElement x = do Right res -> res <$ Lua.pop 1 Left err -> do Lua.pop 1 - Lua.throwLuaError $ + Lua.throwException $ "Error while trying to get a filter's return " ++ "value from lua stack.\n" ++ err diff --git a/src/Text/Pandoc/Lua/Init.hs b/src/Text/Pandoc/Lua/Init.hs index c8c7fdfbd..35611d481 100644 --- a/src/Text/Pandoc/Lua/Init.hs +++ b/src/Text/Pandoc/Lua/Init.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {- Copyright © 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -16,6 +15,7 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} +{-# LANGUAGE NoImplicitPrelude #-} {- | Module : Text.Pandoc.Lua Copyright : Copyright © 2017-2018 Albert Krewinkel @@ -40,7 +40,7 @@ import Control.Monad.Trans (MonadIO (..)) import Data.Data (Data, dataTypeConstrs, dataTypeOf, showConstr) import Data.IORef (newIORef, readIORef) import Data.Version (Version (versionBranch)) -import Foreign.Lua (Lua, LuaException (..)) +import Foreign.Lua (Lua) import GHC.IO.Encoding (getForeignEncoding, setForeignEncoding, utf8) import Paths_pandoc (version) import Text.Pandoc.Class (PandocIO, getCommonState, getUserDataDir, getMediaBag, @@ -54,17 +54,22 @@ import qualified Foreign.Lua as Lua import qualified Foreign.Lua.Module.Text as Lua import qualified Text.Pandoc.Definition as Pandoc +-- | Lua error message +newtype LuaException = LuaException String deriving (Show) + -- | Run the lua interpreter, using pandoc's default way of environment --- initalization. +-- initialization. runPandocLua :: Lua a -> PandocIO (Either LuaException a) runPandocLua luaOp = do luaPkgParams <- luaPackageParams enc <- liftIO $ getForeignEncoding <* setForeignEncoding utf8 - res <- liftIO $ Lua.runLuaEither (initLuaState luaPkgParams *> luaOp) + res <- liftIO $ Lua.runEither (initLuaState luaPkgParams *> luaOp) liftIO $ setForeignEncoding enc newMediaBag <- liftIO (readIORef (luaPkgMediaBag luaPkgParams)) setMediaBag newMediaBag - return res + return $ case res of + Left (Lua.Exception msg) -> Left (LuaException msg) + Right x -> Right x -- | Generate parameters required to setup pandoc's lua environment. luaPackageParams :: PandocIO LuaPackageParams diff --git a/src/Text/Pandoc/Lua/Module/MediaBag.hs b/src/Text/Pandoc/Lua/Module/MediaBag.hs index f48fe56c5..150c06cc8 100644 --- a/src/Text/Pandoc/Lua/Module/MediaBag.hs +++ b/src/Text/Pandoc/Lua/Module/MediaBag.hs @@ -87,7 +87,7 @@ mediaDirectoryFn mbRef = do zipWithM_ addEntry [1..] dirContents return 1 where - addEntry :: Int -> (FilePath, MimeType, Int) -> Lua () + addEntry :: Lua.Integer -> (FilePath, MimeType, Int) -> Lua () addEntry idx (fp, mimeType, contentLength) = do Lua.newtable Lua.push "path" *> Lua.push fp *> Lua.rawset (-3) diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs index 8cb630d7b..769b04b9e 100644 --- a/src/Text/Pandoc/Lua/Module/Pandoc.hs +++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {- Copyright © 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -17,6 +16,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE NoImplicitPrelude #-} {- | Module : Text.Pandoc.Lua.Module.Pandoc Copyright : Copyright © 2017-2018 Albert Krewinkel @@ -36,13 +36,12 @@ import Control.Monad (when) import Data.Default (Default (..)) import Data.Maybe (fromMaybe) import Data.Text (pack) -import Foreign.Lua (ToLuaStack, FromLuaStack, Lua, NumResults, Optional, liftIO) +import Foreign.Lua (Lua, NumResults, Optional, Peekable, Pushable) import System.Exit (ExitCode (..)) import Text.Pandoc.Class (runIO) import Text.Pandoc.Definition (Block, Inline) import Text.Pandoc.Lua.Filter (walkInlines, walkBlocks, LuaFilter) import Text.Pandoc.Lua.StackInstances () -import Text.Pandoc.Lua.Util (addFunction, addValue, loadScriptFromDataDir) import Text.Pandoc.Walk (Walkable) import Text.Pandoc.Options (ReaderOptions (readerExtensions)) import Text.Pandoc.Process (pipeProcess) @@ -51,19 +50,20 @@ import Text.Pandoc.Readers (Reader (..), getReader) import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BSL import qualified Foreign.Lua as Lua +import qualified Text.Pandoc.Lua.Util as LuaUtil -- | Push the "pandoc" on the lua stack. Requires the `list` module to be -- loaded. pushModule :: Maybe FilePath -> Lua NumResults pushModule datadir = do - loadScriptFromDataDir datadir "pandoc.lua" - addFunction "read" readDoc - addFunction "pipe" pipeFn - addFunction "walk_block" walkBlock - addFunction "walk_inline" walkInline + LuaUtil.loadScriptFromDataDir datadir "pandoc.lua" + LuaUtil.addFunction "read" readDoc + LuaUtil.addFunction "pipe" pipeFn + LuaUtil.addFunction "walk_block" walkBlock + LuaUtil.addFunction "walk_inline" walkInline return 1 -walkElement :: (ToLuaStack a, Walkable [Inline] a, Walkable [Block] a) +walkElement :: (Pushable a, Walkable [Inline] a, Walkable [Block] a) => a -> LuaFilter -> Lua a walkElement x f = walkInlines f x >>= walkBlocks f @@ -81,7 +81,8 @@ readDoc content formatSpecOrNil = do Right (reader, es) -> case reader of TextReader r -> do - res <- liftIO $ runIO $ r def{ readerExtensions = es } (pack content) + res <- Lua.liftIO . runIO $ + r def{ readerExtensions = es } (pack content) case res of Right pd -> (1 :: NumResults) <$ Lua.push pd -- success, push Pandoc Left s -> Lua.raiseError (show s) -- error while reading @@ -93,7 +94,7 @@ pipeFn :: String -> BL.ByteString -> Lua NumResults pipeFn command args input = do - (ec, output) <- liftIO $ pipeProcess Nothing command args input + (ec, output) <- Lua.liftIO $ pipeProcess Nothing command args input case ec of ExitSuccess -> 1 <$ Lua.push output ExitFailure n -> Lua.raiseError (PipeError command n output) @@ -104,26 +105,26 @@ data PipeError = PipeError , pipeErrorOutput :: BL.ByteString } -instance FromLuaStack PipeError where +instance Peekable PipeError where peek idx = PipeError <$> (Lua.getfield idx "command" *> Lua.peek (-1) <* Lua.pop 1) <*> (Lua.getfield idx "error_code" *> Lua.peek (-1) <* Lua.pop 1) <*> (Lua.getfield idx "output" *> Lua.peek (-1) <* Lua.pop 1) -instance ToLuaStack PipeError where +instance Pushable PipeError where push pipeErr = do Lua.newtable - addValue "command" (pipeErrorCommand pipeErr) - addValue "error_code" (pipeErrorCode pipeErr) - addValue "output" (pipeErrorOutput pipeErr) + LuaUtil.addField "command" (pipeErrorCommand pipeErr) + LuaUtil.addField "error_code" (pipeErrorCode pipeErr) + LuaUtil.addField "output" (pipeErrorOutput pipeErr) pushPipeErrorMetaTable Lua.setmetatable (-2) where pushPipeErrorMetaTable :: Lua () pushPipeErrorMetaTable = do v <- Lua.newmetatable "pandoc pipe error" - when v $ addFunction "__tostring" pipeErrorMessage + when v $ LuaUtil.addFunction "__tostring" pipeErrorMessage pipeErrorMessage :: PipeError -> Lua BL.ByteString pipeErrorMessage (PipeError cmd errorCode output) = return $ mconcat diff --git a/src/Text/Pandoc/Lua/Module/Utils.hs b/src/Text/Pandoc/Lua/Module/Utils.hs index 7fa4616be..030d6af95 100644 --- a/src/Text/Pandoc/Lua/Module/Utils.hs +++ b/src/Text/Pandoc/Lua/Module/Utils.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {- Copyright © 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -16,6 +15,7 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} +{-# LANGUAGE NoImplicitPrelude #-} {- | Module : Text.Pandoc.Lua.Module.Utils Copyright : Copyright © 2017-2018 Albert Krewinkel @@ -33,15 +33,16 @@ module Text.Pandoc.Lua.Module.Utils import Prelude import Control.Applicative ((<|>)) import Data.Default (def) -import Foreign.Lua (FromLuaStack, Lua, LuaInteger, NumResults) +import Foreign.Lua (Peekable, Lua, NumResults) import Text.Pandoc.Class (runIO, setUserDataDir) import Text.Pandoc.Definition (Pandoc, Meta, MetaValue, Block, Inline) import Text.Pandoc.Lua.StackInstances () -import Text.Pandoc.Lua.Util (addFunction, popValue) +import Text.Pandoc.Lua.Util (addFunction) import qualified Data.Digest.Pure.SHA as SHA import qualified Data.ByteString.Lazy as BSL import qualified Foreign.Lua as Lua +import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.Filter.JSON as JSONFilter import qualified Text.Pandoc.Shared as Shared @@ -49,6 +50,7 @@ import qualified Text.Pandoc.Shared as Shared pushModule :: Maybe FilePath -> Lua NumResults pushModule mbDatadir = do Lua.newtable + addFunction "blocks_to_inlines" blocksToInlines addFunction "hierarchicalize" hierarchicalize addFunction "normalize_date" normalizeDate addFunction "run_json_filter" (runJSONFilter mbDatadir) @@ -57,6 +59,14 @@ pushModule mbDatadir = do addFunction "to_roman_numeral" toRomanNumeral return 1 +-- | Squashes a list of blocks into inlines. +blocksToInlines :: [Block] -> Lua.Optional [Inline] -> Lua [Inline] +blocksToInlines blks optSep = do + let sep = case Lua.fromOptional optSep of + Just x -> B.fromList x + Nothing -> Shared.defaultBlocksSeparator + return $ B.toList (Shared.blocksToInlinesWithSep sep blks) + -- | Convert list of Pandoc blocks into (hierarchical) list of Elements. hierarchicalize :: [Block] -> Lua [Shared.Element] hierarchicalize = return . Shared.hierarchicalize @@ -79,7 +89,7 @@ runJSONFilter mbDatadir doc filterFile optArgs = do Just x -> return x Nothing -> do Lua.getglobal "FORMAT" - (:[]) <$> popValue + (:[]) <$> Lua.popValue filterRes <- Lua.liftIO . runIO $ do setUserDataDir mbDatadir JSONFilter.apply def args filterFile doc @@ -111,18 +121,18 @@ data AstElement | MetaValueElement MetaValue deriving (Show) -instance FromLuaStack AstElement where +instance Peekable AstElement where peek idx = do - res <- Lua.tryLua $ (PandocElement <$> Lua.peek idx) - <|> (InlineElement <$> Lua.peek idx) - <|> (BlockElement <$> Lua.peek idx) - <|> (MetaElement <$> Lua.peek idx) - <|> (MetaValueElement <$> Lua.peek idx) + res <- Lua.try $ (PandocElement <$> Lua.peek idx) + <|> (InlineElement <$> Lua.peek idx) + <|> (BlockElement <$> Lua.peek idx) + <|> (MetaElement <$> Lua.peek idx) + <|> (MetaValueElement <$> Lua.peek idx) case res of Right x -> return x - Left _ -> Lua.throwLuaError + Left _ -> Lua.throwException "Expected an AST element, but could not parse value as such." -- | Convert a number < 4000 to uppercase roman numeral. -toRomanNumeral :: LuaInteger -> Lua String +toRomanNumeral :: Lua.Integer -> Lua String toRomanNumeral = return . Shared.toRomanNumeral . fromIntegral diff --git a/src/Text/Pandoc/Lua/Packages.hs b/src/Text/Pandoc/Lua/Packages.hs index 59637826e..5cf11f5c5 100644 --- a/src/Text/Pandoc/Lua/Packages.hs +++ b/src/Text/Pandoc/Lua/Packages.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {- Copyright © 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -16,8 +15,9 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} -{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Lua.Packages Copyright : Copyright © 2017-2018 Albert Krewinkel @@ -35,12 +35,11 @@ module Text.Pandoc.Lua.Packages import Prelude import Control.Monad (forM_) -import Data.ByteString.Char8 (unpack) +import Data.ByteString (ByteString) import Data.IORef (IORef) import Foreign.Lua (Lua, NumResults, liftIO) import Text.Pandoc.Class (CommonState, readDataFile, runIO, setUserDataDir) import Text.Pandoc.MediaBag (MediaBag) -import Text.Pandoc.Lua.Util (dostring') import qualified Foreign.Lua as Lua import Text.Pandoc.Lua.Module.Pandoc as Pandoc @@ -57,14 +56,10 @@ data LuaPackageParams = LuaPackageParams -- | Insert pandoc's package loader as the first loader, making it the default. installPandocPackageSearcher :: LuaPackageParams -> Lua () installPandocPackageSearcher luaPkgParams = do - luaVersion <- Lua.getglobal "_VERSION" *> Lua.peek (-1) - if luaVersion == "Lua 5.1" - then Lua.getglobal' "package.loaders" - else Lua.getglobal' "package.searchers" + Lua.getglobal' "package.searchers" shiftArray Lua.pushHaskellFunction (pandocPackageSearcher luaPkgParams) - Lua.wrapHaskellFunction - Lua.rawseti (-2) 1 + Lua.rawseti (Lua.nthFromTop 2) 1 Lua.pop 1 -- remove 'package.searchers' from stack where shiftArray = forM_ [4, 3, 2, 1] $ \i -> do @@ -86,7 +81,6 @@ pandocPackageSearcher luaPkgParams pkgName = where pushWrappedHsFun f = do Lua.pushHaskellFunction f - Lua.wrapHaskellFunction return 1 searchPureLuaLoader = do let filename = pkgName ++ ".lua" @@ -97,21 +91,19 @@ pandocPackageSearcher luaPkgParams pkgName = Lua.push ("no file '" ++ filename ++ "' in pandoc's datadir") return 1 -loadStringAsPackage :: String -> String -> Lua NumResults +loadStringAsPackage :: String -> ByteString -> Lua NumResults loadStringAsPackage pkgName script = do - status <- dostring' script + status <- Lua.dostring script if status == Lua.OK then return (1 :: NumResults) else do - msg <- Lua.peek (-1) <* Lua.pop 1 - Lua.push ("Error while loading ``" ++ pkgName ++ "`.\n" ++ msg) - Lua.lerror - return (2 :: NumResults) + msg <- Lua.popValue + Lua.raiseError ("Error while loading `" <> pkgName <> "`.\n" <> msg) --- | Get the string representation of the pandoc module -dataDirScript :: Maybe FilePath -> FilePath -> IO (Maybe String) +-- | Get the ByteString representation of the pandoc module. +dataDirScript :: Maybe FilePath -> FilePath -> IO (Maybe ByteString) dataDirScript datadir moduleFile = do res <- runIO $ setUserDataDir datadir >> readDataFile moduleFile return $ case res of Left _ -> Nothing - Right s -> Just (unpack s) + Right s -> Just s diff --git a/src/Text/Pandoc/Lua/StackInstances.hs b/src/Text/Pandoc/Lua/StackInstances.hs index 3298079c5..931b8c225 100644 --- a/src/Text/Pandoc/Lua/StackInstances.hs +++ b/src/Text/Pandoc/Lua/StackInstances.hs @@ -1,4 +1,8 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE LambdaCase #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} {- Copyright © 2012-2018 John MacFarlane <jgm@berkeley.edu> 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -17,10 +21,6 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# OPTIONS_GHC -fno-warn-orphans #-} {- | Module : Text.Pandoc.Lua.StackInstances Copyright : © 2012-2018 John MacFarlane @@ -37,148 +37,125 @@ module Text.Pandoc.Lua.StackInstances () where import Prelude import Control.Applicative ((<|>)) import Control.Monad (when) -import Control.Monad.Catch (finally) import Data.Data (showConstr, toConstr) -import Data.Foldable (forM_) -import Foreign.Lua (FromLuaStack (peek), Lua, LuaInteger, LuaNumber, StackIndex, - ToLuaStack (push), Type (..), throwLuaError, tryLua) +import Foreign.Lua (Lua, Peekable, Pushable, StackIndex) import Text.Pandoc.Definition import Text.Pandoc.Extensions (Extensions) -import Text.Pandoc.Lua.Util (getTable, getTag, pushViaConstructor, typeCheck) +import Text.Pandoc.Lua.Util (defineHowTo, pushViaConstructor) import Text.Pandoc.Options (ReaderOptions (..), TrackChanges) -import Text.Pandoc.Shared (Element (Blk, Sec), safeRead) +import Text.Pandoc.Shared (Element (Blk, Sec)) -import qualified Foreign.Lua as Lua import qualified Data.Set as Set +import qualified Foreign.Lua as Lua import qualified Text.Pandoc.Lua.Util as LuaUtil -defineHowTo :: String -> Lua a -> Lua a -defineHowTo ctx op = op `Lua.modifyLuaError` (("Could not " ++ ctx ++ ": ") ++) - -instance ToLuaStack Pandoc where +instance Pushable Pandoc where push (Pandoc meta blocks) = pushViaConstructor "Pandoc" blocks meta -instance FromLuaStack Pandoc where +instance Peekable Pandoc where peek idx = defineHowTo "get Pandoc value" $ do - typeCheck idx Lua.TypeTable - blocks <- getTable idx "blocks" - meta <- Lua.getfield idx "meta" *> (Lua.peek Lua.stackTop `finally` Lua.pop 1) + blocks <- LuaUtil.rawField idx "blocks" + meta <- LuaUtil.rawField idx "meta" return $ Pandoc meta blocks -instance ToLuaStack Meta where +instance Pushable Meta where push (Meta mmap) = pushViaConstructor "Meta" mmap -instance FromLuaStack Meta where - peek idx = defineHowTo "get Meta value" $ do - typeCheck idx Lua.TypeTable - Meta <$> peek idx +instance Peekable Meta where + peek idx = defineHowTo "get Meta value" $ + Meta <$> Lua.peek idx -instance ToLuaStack MetaValue where +instance Pushable MetaValue where push = pushMetaValue -instance FromLuaStack MetaValue where +instance Peekable MetaValue where peek = peekMetaValue -instance ToLuaStack Block where +instance Pushable Block where push = pushBlock -instance FromLuaStack Block where +instance Peekable Block where peek = peekBlock -- Inline -instance ToLuaStack Inline where +instance Pushable Inline where push = pushInline -instance FromLuaStack Inline where +instance Peekable Inline where peek = peekInline -- Citation -instance ToLuaStack Citation where +instance Pushable Citation where push (Citation cid prefix suffix mode noteNum hash) = pushViaConstructor "Citation" cid mode prefix suffix noteNum hash -instance FromLuaStack Citation where +instance Peekable Citation where peek idx = do - id' <- getTable idx "id" - prefix <- getTable idx "prefix" - suffix <- getTable idx "suffix" - mode <- getTable idx "mode" - num <- getTable idx "note_num" - hash <- getTable idx "hash" + id' <- LuaUtil.rawField idx "id" + prefix <- LuaUtil.rawField idx "prefix" + suffix <- LuaUtil.rawField idx "suffix" + mode <- LuaUtil.rawField idx "mode" + num <- LuaUtil.rawField idx "note_num" + hash <- LuaUtil.rawField idx "hash" return $ Citation id' prefix suffix mode num hash -instance ToLuaStack Alignment where - push = push . show -instance FromLuaStack Alignment where - peek idx = safeRead' =<< peek idx - -instance ToLuaStack CitationMode where - push = push . show -instance FromLuaStack CitationMode where - peek idx = safeRead' =<< peek idx - -instance ToLuaStack Format where - push (Format f) = push f -instance FromLuaStack Format where - peek idx = Format <$> peek idx - -instance ToLuaStack ListNumberDelim where - push = push . show -instance FromLuaStack ListNumberDelim where - peek idx = safeRead' =<< peek idx - -instance ToLuaStack ListNumberStyle where - push = push . show -instance FromLuaStack ListNumberStyle where - peek idx = safeRead' =<< peek idx - -instance ToLuaStack MathType where - push = push . show -instance FromLuaStack MathType where - peek idx = safeRead' =<< peek idx - -instance ToLuaStack QuoteType where - push = push . show -instance FromLuaStack QuoteType where - peek idx = safeRead' =<< peek idx - -instance ToLuaStack Double where - push = push . (realToFrac :: Double -> LuaNumber) -instance FromLuaStack Double where - peek = fmap (realToFrac :: LuaNumber -> Double) . peek - -instance ToLuaStack Int where - push = push . (fromIntegral :: Int -> LuaInteger) -instance FromLuaStack Int where - peek = fmap (fromIntegral :: LuaInteger-> Int) . peek - -safeRead' :: Read a => String -> Lua a -safeRead' s = case safeRead s of - Nothing -> throwLuaError ("Could not read: " ++ s) - Just x -> return x +instance Pushable Alignment where + push = Lua.push . show +instance Peekable Alignment where + peek = Lua.peekRead + +instance Pushable CitationMode where + push = Lua.push . show +instance Peekable CitationMode where + peek = Lua.peekRead + +instance Pushable Format where + push (Format f) = Lua.push f +instance Peekable Format where + peek idx = Format <$> Lua.peek idx + +instance Pushable ListNumberDelim where + push = Lua.push . show +instance Peekable ListNumberDelim where + peek = Lua.peekRead + +instance Pushable ListNumberStyle where + push = Lua.push . show +instance Peekable ListNumberStyle where + peek = Lua.peekRead + +instance Pushable MathType where + push = Lua.push . show +instance Peekable MathType where + peek = Lua.peekRead + +instance Pushable QuoteType where + push = Lua.push . show +instance Peekable QuoteType where + peek = Lua.peekRead -- | Push an meta value element to the top of the lua stack. pushMetaValue :: MetaValue -> Lua () pushMetaValue = \case MetaBlocks blcks -> pushViaConstructor "MetaBlocks" blcks - MetaBool bool -> push bool + MetaBool bool -> Lua.push bool MetaInlines inlns -> pushViaConstructor "MetaInlines" inlns MetaList metalist -> pushViaConstructor "MetaList" metalist MetaMap metamap -> pushViaConstructor "MetaMap" metamap - MetaString str -> push str + MetaString str -> Lua.push str -- | Interpret the value at the given stack index as meta value. peekMetaValue :: StackIndex -> Lua MetaValue peekMetaValue idx = defineHowTo "get MetaValue" $ do -- Get the contents of an AST element. - let elementContent :: FromLuaStack a => Lua a - elementContent = peek idx + let elementContent :: Peekable a => Lua a + elementContent = Lua.peek idx luatype <- Lua.ltype idx case luatype of - TypeBoolean -> MetaBool <$> peek idx - TypeString -> MetaString <$> peek idx - TypeTable -> do - tag <- tryLua $ getTag idx + Lua.TypeBoolean -> MetaBool <$> Lua.peek idx + Lua.TypeString -> MetaString <$> Lua.peek idx + Lua.TypeTable -> do + tag <- Lua.try $ LuaUtil.getTag idx case tag of Right "MetaBlocks" -> MetaBlocks <$> elementContent Right "MetaBool" -> MetaBool <$> elementContent @@ -186,16 +163,16 @@ peekMetaValue idx = defineHowTo "get MetaValue" $ do Right "MetaInlines" -> MetaInlines <$> elementContent Right "MetaList" -> MetaList <$> elementContent Right "MetaString" -> MetaString <$> elementContent - Right t -> throwLuaError ("Unknown meta tag: " ++ t) + Right t -> Lua.throwException ("Unknown meta tag: " <> t) Left _ -> do -- no meta value tag given, try to guess. len <- Lua.rawlen idx if len <= 0 - then MetaMap <$> peek idx - else (MetaInlines <$> peek idx) - <|> (MetaBlocks <$> peek idx) - <|> (MetaList <$> peek idx) - _ -> throwLuaError "could not get meta value" + then MetaMap <$> Lua.peek idx + else (MetaInlines <$> Lua.peek idx) + <|> (MetaBlocks <$> Lua.peek idx) + <|> (MetaList <$> Lua.peek idx) + _ -> Lua.throwException "could not get meta value" -- | Push an block element to the top of the lua stack. pushBlock :: Block -> Lua () @@ -219,8 +196,7 @@ pushBlock = \case -- | Return the value at the given index as block if possible. peekBlock :: StackIndex -> Lua Block peekBlock idx = defineHowTo "get Block value" $ do - typeCheck idx Lua.TypeTable - tag <- getTag idx + tag <- LuaUtil.getTag idx case tag of "BlockQuote" -> BlockQuote <$> elementContent "BulletList" -> BulletList <$> elementContent @@ -239,11 +215,11 @@ peekBlock idx = defineHowTo "get Block value" $ do "Table" -> (\(capt, aligns, widths, headers, body) -> Table capt aligns widths headers body) <$> elementContent - _ -> throwLuaError ("Unknown block type: " ++ tag) + _ -> Lua.throwException ("Unknown block type: " <> tag) where -- Get the contents of an AST element. - elementContent :: FromLuaStack a => Lua a - elementContent = getTable idx "c" + elementContent :: Peekable a => Lua a + elementContent = LuaUtil.rawField idx "c" -- | Push an inline element to the top of the lua stack. pushInline :: Inline -> Lua () @@ -271,8 +247,7 @@ pushInline = \case -- | Return the value at the given index as inline if possible. peekInline :: StackIndex -> Lua Inline peekInline idx = defineHowTo "get Inline value" $ do - typeCheck idx Lua.TypeTable - tag <- getTag idx + tag <- LuaUtil.getTag idx case tag of "Cite" -> uncurry Cite <$> elementContent "Code" -> withAttr Code <$> elementContent @@ -295,11 +270,11 @@ peekInline idx = defineHowTo "get Inline value" $ do "Strong" -> Strong <$> elementContent "Subscript" -> Subscript <$> elementContent "Superscript"-> Superscript <$> elementContent - _ -> throwLuaError ("Unknown inline type: " ++ tag) + _ -> Lua.throwException ("Unknown inline type: " <> tag) where -- Get the contents of an AST element. - elementContent :: FromLuaStack a => Lua a - elementContent = getTable idx "c" + elementContent :: Peekable a => Lua a + elementContent = LuaUtil.rawField idx "c" withAttr :: (Attr -> a -> b) -> (LuaAttr, a) -> b withAttr f (attributes, x) = f (fromLuaAttr attributes) x @@ -307,25 +282,25 @@ withAttr f (attributes, x) = f (fromLuaAttr attributes) x -- | Wrapper for Attr newtype LuaAttr = LuaAttr { fromLuaAttr :: Attr } -instance ToLuaStack LuaAttr where +instance Pushable LuaAttr where push (LuaAttr (id', classes, kv)) = pushViaConstructor "Attr" id' classes kv -instance FromLuaStack LuaAttr where - peek idx = defineHowTo "get Attr value" (LuaAttr <$> peek idx) +instance Peekable LuaAttr where + peek idx = defineHowTo "get Attr value" (LuaAttr <$> Lua.peek idx) -- -- Hierarchical elements -- -instance ToLuaStack Element where - push (Blk blk) = push blk +instance Pushable Element where + push (Blk blk) = Lua.push blk push (Sec lvl num attr label contents) = do Lua.newtable - LuaUtil.addValue "level" lvl - LuaUtil.addValue "numbering" num - LuaUtil.addValue "attr" (LuaAttr attr) - LuaUtil.addValue "label" label - LuaUtil.addValue "contents" contents + LuaUtil.addField "level" lvl + LuaUtil.addField "numbering" num + LuaUtil.addField "attr" (LuaAttr attr) + LuaUtil.addField "label" label + LuaUtil.addField "contents" contents pushSecMetaTable Lua.setmetatable (-2) where @@ -333,7 +308,7 @@ instance ToLuaStack Element where pushSecMetaTable = do inexistant <- Lua.newmetatable "PandocElementSec" when inexistant $ do - LuaUtil.addValue "t" "Sec" + LuaUtil.addField "t" "Sec" Lua.push "__index" Lua.pushvalue (-2) Lua.rawset (-3) @@ -342,18 +317,13 @@ instance ToLuaStack Element where -- -- Reader Options -- -instance ToLuaStack Extensions where - push exts = push (show exts) - -instance ToLuaStack TrackChanges where - push = push . showConstr . toConstr +instance Pushable Extensions where + push exts = Lua.push (show exts) -instance ToLuaStack a => ToLuaStack (Set.Set a) where - push set = do - Lua.newtable - forM_ set (`LuaUtil.addValue` True) +instance Pushable TrackChanges where + push = Lua.push . showConstr . toConstr -instance ToLuaStack ReaderOptions where +instance Pushable ReaderOptions where push ro = do let ReaderOptions (extensions :: Extensions) @@ -367,12 +337,12 @@ instance ToLuaStack ReaderOptions where (stripComments :: Bool) = ro Lua.newtable - LuaUtil.addValue "extensions" extensions - LuaUtil.addValue "standalone" standalone - LuaUtil.addValue "columns" columns - LuaUtil.addValue "tabStop" tabStop - LuaUtil.addValue "indentedCodeClasses" indentedCodeClasses - LuaUtil.addValue "abbreviations" abbreviations - LuaUtil.addValue "defaultImageExtension" defaultImageExtension - LuaUtil.addValue "trackChanges" trackChanges - LuaUtil.addValue "stripComments" stripComments + LuaUtil.addField "extensions" extensions + LuaUtil.addField "standalone" standalone + LuaUtil.addField "columns" columns + LuaUtil.addField "tabStop" tabStop + LuaUtil.addField "indentedCodeClasses" indentedCodeClasses + LuaUtil.addField "abbreviations" abbreviations + LuaUtil.addField "defaultImageExtension" defaultImageExtension + LuaUtil.addField "trackChanges" trackChanges + LuaUtil.addField "stripComments" stripComments diff --git a/src/Text/Pandoc/Lua/Util.hs b/src/Text/Pandoc/Lua/Util.hs index ea9ec2554..77b27b88e 100644 --- a/src/Text/Pandoc/Lua/Util.hs +++ b/src/Text/Pandoc/Lua/Util.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {- Copyright © 2012-2018 John MacFarlane <jgm@berkeley.edu> 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -18,6 +17,8 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE NoImplicitPrelude #-} {- | Module : Text.Pandoc.Lua.Util Copyright : © 2012–2018 John MacFarlane, @@ -31,101 +32,53 @@ Lua utility functions. -} module Text.Pandoc.Lua.Util ( getTag - , getTable - , addValue + , rawField + , addField , addFunction - , getRawInt - , setRawInt - , addRawInt - , typeCheck - , raiseError - , popValue - , PushViaCall - , pushViaCall + , addValue , pushViaConstructor , loadScriptFromDataDir - , dostring' + , defineHowTo + , throwTopMessageAsError' + , callWithTraceback + , dofileWithTraceback ) where import Prelude -import Control.Monad (when) -import Control.Monad.Catch (finally) -import Data.ByteString.Char8 (unpack) -import Foreign.Lua (FromLuaStack (..), NumResults, Lua, NumArgs, StackIndex, - ToLuaStack (..), ToHaskellFunction) -import Foreign.Lua.Api (Status, call, pop, rawget, rawgeti, rawset, rawseti) +import Control.Monad (unless, when) +import Foreign.Lua ( Lua, NumArgs, NumResults, Peekable, Pushable, StackIndex + , Status, ToHaskellFunction ) import Text.Pandoc.Class (readDataFile, runIOorExplode, setUserDataDir) import qualified Foreign.Lua as Lua - --- | Adjust the stack index, assuming that @n@ new elements have been pushed on --- the stack. -adjustIndexBy :: StackIndex -> StackIndex -> StackIndex -adjustIndexBy idx n = - if idx < 0 - then idx - n - else idx +import qualified Text.Pandoc.UTF8 as UTF8 -- | Get value behind key from table at given index. -getTable :: (ToLuaStack a, FromLuaStack b) => StackIndex -> a -> Lua b -getTable idx key = do - push key - rawget (idx `adjustIndexBy` 1) - popValue +rawField :: Peekable a => StackIndex -> String -> Lua a +rawField idx key = do + absidx <- Lua.absindex idx + Lua.push key + Lua.rawget absidx + Lua.popValue + +-- | Add a value to the table at the top of the stack at a string-index. +addField :: Pushable a => String -> a -> Lua () +addField = addValue -- | Add a key-value pair to the table at the top of the stack. -addValue :: (ToLuaStack a, ToLuaStack b) => a -> b -> Lua () +addValue :: (Pushable a, Pushable b) => a -> b -> Lua () addValue key value = do - push key - push value - rawset (-3) + Lua.push key + Lua.push value + Lua.rawset (Lua.nthFromTop 3) -- | Add a function to the table at the top of the stack, using the given name. addFunction :: ToHaskellFunction a => String -> a -> Lua () addFunction name fn = do Lua.push name Lua.pushHaskellFunction fn - Lua.wrapHaskellFunction Lua.rawset (-3) --- | Get value behind key from table at given index. -getRawInt :: FromLuaStack a => StackIndex -> Int -> Lua a -getRawInt idx key = do - rawgeti idx key - popValue - --- | Set numeric key/value in table at the given index -setRawInt :: ToLuaStack a => StackIndex -> Int -> a -> Lua () -setRawInt idx key value = do - push value - rawseti (idx `adjustIndexBy` 1) key - --- | Set numeric key/value in table at the top of the stack. -addRawInt :: ToLuaStack a => Int -> a -> Lua () -addRawInt = setRawInt (-1) - -typeCheck :: StackIndex -> Lua.Type -> Lua () -typeCheck idx expected = do - actual <- Lua.ltype idx - when (actual /= expected) $ do - expName <- Lua.typename expected - actName <- Lua.typename actual - Lua.throwLuaError $ "expected " ++ expName ++ " but got " ++ actName ++ "." - -raiseError :: ToLuaStack a => a -> Lua NumResults -raiseError e = do - Lua.push e - fromIntegral <$> Lua.lerror - --- | Get, then pop the value at the top of the stack. -popValue :: FromLuaStack a => Lua a -popValue = do - resOrError <- Lua.peekEither (-1) - pop 1 - case resOrError of - Left err -> Lua.throwLuaError err - Right x -> return x - -- | Helper class for pushing a single value to the stack via a lua function. -- See @pushViaCall@. class PushViaCall a where @@ -136,11 +89,11 @@ instance PushViaCall (Lua ()) where Lua.push fn Lua.rawget Lua.registryindex pushArgs - call num 1 + Lua.call num 1 -instance (ToLuaStack a, PushViaCall b) => PushViaCall (a -> b) where +instance (Pushable a, PushViaCall b) => PushViaCall (a -> b) where pushViaCall' fn pushArgs num x = - pushViaCall' fn (pushArgs *> push x) (num + 1) + pushViaCall' fn (pushArgs *> Lua.push x) (num + 1) -- | Push an value to the stack via a lua function. The lua function is called -- with all arguments that are passed to this function and is expected to return @@ -155,26 +108,11 @@ pushViaConstructor pandocFn = pushViaCall ("pandoc." ++ pandocFn) -- | Load a file from pandoc's data directory. loadScriptFromDataDir :: Maybe FilePath -> FilePath -> Lua () loadScriptFromDataDir datadir scriptFile = do - script <- fmap unpack . Lua.liftIO . runIOorExplode $ + script <- Lua.liftIO . runIOorExplode $ setUserDataDir datadir >> readDataFile scriptFile - status <- dostring' script - when (status /= Lua.OK) . - Lua.throwTopMessageAsError' $ \msg -> - "Couldn't load '" ++ scriptFile ++ "'.\n" ++ msg - --- | Load a string and immediately perform a full garbage collection. This is --- important to keep the program from hanging: If the program contained a call --- to @require@, the a new loader function was created which then become --- garbage. If that function is collected at an inopportune times, i.e. when the --- Lua API is called via a function that doesn't allow calling back into Haskell --- (getraw, setraw, …), then the function's finalizer, and the full program, --- will hang. -dostring' :: String -> Lua Status -dostring' script = do - loadRes <- Lua.loadstring script - if loadRes == Lua.OK - then Lua.pcall 0 1 Nothing <* Lua.gc Lua.GCCOLLECT 0 - else return loadRes + status <- Lua.dostring script + when (status /= Lua.OK) $ + throwTopMessageAsError' (("Couldn't load '" ++ scriptFile ++ "'.\n") ++) -- | Get the tag of a value. This is an optimized and specialized version of -- @Lua.getfield idx "tag"@. It only checks for the field on the table at index @@ -182,8 +120,54 @@ dostring' script = do -- metatable. getTag :: StackIndex -> Lua String getTag idx = do - top <- Lua.gettop - hasMT <- Lua.getmetatable idx - push "tag" - if hasMT then Lua.rawget (-2) else Lua.rawget (idx `adjustIndexBy` 1) - peek Lua.stackTop `finally` Lua.settop top + -- push metatable or just the table + Lua.getmetatable idx >>= \hasMT -> unless hasMT (Lua.pushvalue idx) + Lua.push "tag" + Lua.rawget (Lua.nthFromTop 2) + Lua.tostring Lua.stackTop <* Lua.pop 2 >>= \case + Nothing -> Lua.throwException "untagged value" + Just x -> return (UTF8.toString x) + +-- | Modify the message at the top of the stack before throwing it as an +-- Exception. +throwTopMessageAsError' :: (String -> String) -> Lua a +throwTopMessageAsError' modifier = do + msg <- Lua.tostring' Lua.stackTop + Lua.pop 2 -- remove error and error string pushed by tostring' + Lua.throwException (modifier (UTF8.toString msg)) + +-- | Mark the context of a Lua computation for better error reporting. +defineHowTo :: String -> Lua a -> Lua a +defineHowTo ctx = Lua.withExceptionMessage (("Could not " <> ctx <> ": ") <>) + +-- | Like @'Lua.pcall'@, but uses a predefined error handler which adds a +-- traceback on error. +pcallWithTraceback :: NumArgs -> NumResults -> Lua Status +pcallWithTraceback nargs nresults = do + let traceback' :: Lua NumResults + traceback' = do + l <- Lua.state + msg <- Lua.tostring' (Lua.nthFromBottom 1) + Lua.traceback l (Just (UTF8.toString msg)) 2 + return 1 + tracebackIdx <- Lua.absindex (Lua.nthFromTop (Lua.fromNumArgs nargs + 1)) + Lua.pushHaskellFunction traceback' + Lua.insert tracebackIdx + result <- Lua.pcall nargs nresults (Just tracebackIdx) + Lua.remove tracebackIdx + return result + +-- | Like @'Lua.call'@, but adds a traceback to the error message (if any). +callWithTraceback :: NumArgs -> NumResults -> Lua () +callWithTraceback nargs nresults = do + result <- pcallWithTraceback nargs nresults + when (result /= Lua.OK) Lua.throwTopMessage + +-- | Run the given string as a Lua program, while also adding a traceback to the +-- error message if an error occurs. +dofileWithTraceback :: FilePath -> Lua Status +dofileWithTraceback fp = do + loadRes <- Lua.loadfile fp + case loadRes of + Lua.OK -> pcallWithTraceback 0 Lua.multret + _ -> return loadRes diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index e5ca1764c..204060d70 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -194,6 +194,7 @@ data WriterOptions = WriterOptions , writerReferenceDoc :: Maybe FilePath -- ^ Path to reference document if specified , writerReferenceLocation :: ReferenceLocation -- ^ Location of footnotes and references for writing markdown , writerSyntaxMap :: SyntaxMap + , writerPreferAscii :: Bool -- ^ Prefer ASCII representations of characters when possible } deriving (Show, Data, Typeable, Generic) instance Default WriterOptions where @@ -228,6 +229,7 @@ instance Default WriterOptions where , writerReferenceDoc = Nothing , writerReferenceLocation = EndOfDocument , writerSyntaxMap = defaultSyntaxMap + , writerPreferAscii = False } instance HasSyntaxExtensions WriterOptions where diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index b171d65b0..3484699c0 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -79,13 +79,52 @@ changePathSeparators = intercalate "/" . splitDirectories #endif makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex, - -- wkhtmltopdf, weasyprint, prince, context, pdfroff) + -- wkhtmltopdf, weasyprint, prince, context, pdfroff, + -- or path to executable) -> [String] -- ^ arguments to pass to pdf creator -> (WriterOptions -> Pandoc -> PandocIO Text) -- ^ writer -> WriterOptions -- ^ options -> Pandoc -- ^ document -> PandocIO (Either ByteString ByteString) -makePDF "wkhtmltopdf" pdfargs writer opts doc@(Pandoc meta _) = do +makePDF program pdfargs writer opts doc = do + case takeBaseName program of + "wkhtmltopdf" -> makeWithWkhtmltopdf program pdfargs writer opts doc + prog | prog `elem` ["weasyprint", "prince"] -> do + source <- writer opts doc + verbosity <- getVerbosity + liftIO $ html2pdf verbosity program pdfargs source + "pdfroff" -> do + source <- writer opts doc + let args = ["-ms", "-mpdfmark", "-e", "-t", "-k", "-KUTF-8", "-i", + "--no-toc-relocation"] ++ pdfargs + verbosity <- getVerbosity + liftIO $ ms2pdf verbosity program args source + baseProg -> do + -- With context and latex, we create a temp directory within + -- the working directory, since pdflatex sometimes tries to + -- use tools like epstopdf.pl, which are restricted if run + -- on files outside the working directory. + let withTemp = withTempDirectory "." + commonState <- getCommonState + verbosity <- getVerbosity + liftIO $ withTemp "tex2pdf." $ \tmpdir -> do + source <- runIOorExplode $ do + putCommonState commonState + doc' <- handleImages tmpdir doc + writer opts doc' + case baseProg of + "context" -> context2pdf verbosity program tmpdir source + prog | prog `elem` ["pdflatex", "lualatex", "xelatex"] + -> tex2pdf verbosity program pdfargs tmpdir source + _ -> return $ Left $ UTF8.fromStringLazy $ "Unknown program " ++ program + +makeWithWkhtmltopdf :: String -- ^ wkhtmltopdf or path + -> [String] -- ^ arguments + -> (WriterOptions -> Pandoc -> PandocIO Text) -- ^ writer + -> WriterOptions -- ^ options + -> Pandoc -- ^ document + -> PandocIO (Either ByteString ByteString) +makeWithWkhtmltopdf program pdfargs writer opts doc@(Pandoc meta _) = do let mathArgs = case writerHTMLMathMethod opts of -- with MathJax, wait til all math is rendered: MathJax _ -> ["--run-script", "MathJax.Hub.Register.StartupHook('End Typeset', function() { window.status = 'mathjax_loaded' });", @@ -111,39 +150,7 @@ makePDF "wkhtmltopdf" pdfargs writer opts doc@(Pandoc meta _) = do ] source <- writer opts doc verbosity <- getVerbosity - liftIO $ html2pdf verbosity "wkhtmltopdf" args source -makePDF "weasyprint" pdfargs writer opts doc = do - source <- writer opts doc - verbosity <- getVerbosity - liftIO $ html2pdf verbosity "weasyprint" pdfargs source -makePDF "prince" pdfargs writer opts doc = do - source <- writer opts doc - verbosity <- getVerbosity - liftIO $ html2pdf verbosity "prince" pdfargs source -makePDF "pdfroff" pdfargs writer opts doc = do - source <- writer opts doc - let args = ["-ms", "-mpdfmark", "-e", "-t", "-k", "-KUTF-8", "-i", - "--no-toc-relocation"] ++ pdfargs - verbosity <- getVerbosity - liftIO $ ms2pdf verbosity args source -makePDF program pdfargs writer opts doc = do - -- With context and latex, we create a temp directory within - -- the working directory, since pdflatex sometimes tries to - -- use tools like epstopdf.pl, which are restricted if run - -- on files outside the working directory. - let withTemp = withTempDirectory "." - commonState <- getCommonState - verbosity <- getVerbosity - liftIO $ withTemp "tex2pdf." $ \tmpdir -> do - source <- runIOorExplode $ do - putCommonState commonState - doc' <- handleImages tmpdir doc - writer opts doc' - case takeBaseName program of - "context" -> context2pdf verbosity tmpdir source - prog | prog `elem` ["pdflatex", "lualatex", "xelatex"] - -> tex2pdf' verbosity pdfargs tmpdir program source - _ -> return $ Left $ UTF8.fromStringLazy $ "Unknown program " ++ program + liftIO $ html2pdf verbosity program args source handleImages :: FilePath -- ^ temp dir to store images -> Pandoc -- ^ document @@ -181,7 +188,7 @@ convertImage tmpdir fname = then return $ Right pdfOut else return $ Left "conversion from SVG failed") (\(e :: E.SomeException) -> return $ Left $ - "check that rsvg2pdf is in path.\n" ++ + "check that rsvg-convert is in path.\n" ++ show e) _ -> JP.readImage fname >>= \res -> case res of @@ -195,13 +202,13 @@ convertImage tmpdir fname = mime = getMimeType fname doNothing = return (Right fname) -tex2pdf' :: Verbosity -- ^ Verbosity level - -> [String] -- ^ Arguments to the latex-engine - -> FilePath -- ^ temp directory for output - -> String -- ^ tex program - -> Text -- ^ tex source - -> IO (Either ByteString ByteString) -tex2pdf' verbosity args tmpDir program source = do +tex2pdf :: Verbosity -- ^ Verbosity level + -> String -- ^ tex program + -> [String] -- ^ Arguments to the latex-engine + -> FilePath -- ^ temp directory for output + -> Text -- ^ tex source + -> IO (Either ByteString ByteString) +tex2pdf verbosity program args tmpDir source = do let numruns = if "\\tableofcontents" `T.isInfixOf` source then 3 -- to get page numbers else 2 -- 1 run won't give you PDF bookmarks @@ -278,12 +285,7 @@ runTeXProgram verbosity program args runNumber numRuns tmpDir source = do let file' = file #endif let programArgs = ["-halt-on-error", "-interaction", "nonstopmode", - "-output-directory", tmpDir'] ++ - -- see #4484, only compress images on last run: - if program == "xelatex" && runNumber < numRuns - then ["-output-driver", "xdvipdfmx -z0"] - else [] - ++ args ++ [file'] + "-output-directory", tmpDir'] ++ args ++ [file'] env' <- getEnvironment let sep = [searchPathSeparator] let texinputs = maybe (tmpDir' ++ sep) ((tmpDir' ++ sep) ++) @@ -307,7 +309,7 @@ runTeXProgram verbosity program args runNumber numRuns tmpDir source = do putStrLn $ "[makePDF] Run #" ++ show runNumber BL.hPutStr stdout out putStr "\n" - if runNumber <= numRuns + if runNumber < numRuns then runTeXProgram verbosity program args (runNumber + 1) numRuns tmpDir source else do let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir @@ -328,14 +330,15 @@ runTeXProgram verbosity program args runNumber numRuns tmpDir source = do return (exit, log', pdf) ms2pdf :: Verbosity + -> String -> [String] -> Text -> IO (Either ByteString ByteString) -ms2pdf verbosity args source = do +ms2pdf verbosity program args source = do env' <- getEnvironment when (verbosity >= INFO) $ do putStrLn "[makePDF] Command line:" - putStrLn $ "pdfroff " ++ " " ++ unwords (map show args) + putStrLn $ program ++ " " ++ unwords (map show args) putStr "\n" putStrLn "[makePDF] Environment:" mapM_ print env' @@ -344,11 +347,11 @@ ms2pdf verbosity args source = do putStr $ T.unpack source putStr "\n" (exit, out) <- E.catch - (pipeProcess (Just env') "pdfroff" args + (pipeProcess (Just env') program args (BL.fromStrict $ UTF8.fromText source)) (\(e :: IOError) -> if isDoesNotExistError e then E.throwIO $ - PandocPDFProgramNotFoundError "pdfroff" + PandocPDFProgramNotFoundError program else E.throwIO e) when (verbosity >= INFO) $ do BL.hPutStr stdout out @@ -358,7 +361,7 @@ ms2pdf verbosity args source = do ExitSuccess -> Right out html2pdf :: Verbosity -- ^ Verbosity level - -> String -- ^ Program (wkhtmltopdf, weasyprint or prince) + -> String -- ^ Program (wkhtmltopdf, weasyprint, prince, or path) -> [String] -- ^ Args to program -> Text -- ^ HTML5 source -> IO (Either ByteString ByteString) @@ -369,7 +372,7 @@ html2pdf verbosity program args source = do file <- withTempFile "." "html2pdf.html" $ \fp _ -> return fp pdfFile <- withTempFile "." "html2pdf.pdf" $ \fp _ -> return fp BS.writeFile file $ UTF8.fromText source - let pdfFileArgName = ["-o" | program == "prince"] + let pdfFileArgName = ["-o" | takeBaseName program == "prince"] let programArgs = args ++ [file] ++ pdfFileArgName ++ [pdfFile] env' <- getEnvironment when (verbosity >= INFO) $ do @@ -408,10 +411,11 @@ html2pdf verbosity program args source = do (ExitSuccess, Just pdf) -> Right pdf context2pdf :: Verbosity -- ^ Verbosity level + -> String -- ^ "context" or path to it -> FilePath -- ^ temp directory for output -> Text -- ^ ConTeXt source -> IO (Either ByteString ByteString) -context2pdf verbosity tmpDir source = inDirectory tmpDir $ do +context2pdf verbosity program tmpDir source = inDirectory tmpDir $ do let file = "input.tex" BS.writeFile file $ UTF8.fromText source #ifdef _WINDOWS @@ -426,7 +430,7 @@ context2pdf verbosity tmpDir source = inDirectory tmpDir $ do putStrLn "[makePDF] temp dir:" putStrLn tmpDir' putStrLn "[makePDF] Command line:" - putStrLn $ "context" ++ " " ++ unwords (map show programArgs) + putStrLn $ program ++ " " ++ unwords (map show programArgs) putStr "\n" putStrLn "[makePDF] Environment:" mapM_ print env' @@ -435,7 +439,7 @@ context2pdf verbosity tmpDir source = inDirectory tmpDir $ do BL.readFile file >>= BL.putStr putStr "\n" (exit, out) <- E.catch - (pipeProcess (Just env') "context" programArgs BL.empty) + (pipeProcess (Just env') program programArgs BL.empty) (\(e :: IOError) -> if isDoesNotExistError e then E.throwIO $ PandocPDFProgramNotFoundError "context" diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 05f4f7d36..5d95d0e27 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -514,22 +514,19 @@ charsInBalanced open close parser = try $ do -- Auxiliary functions for romanNumeral: -lowercaseRomanDigits :: [Char] -lowercaseRomanDigits = ['i','v','x','l','c','d','m'] - -uppercaseRomanDigits :: [Char] -uppercaseRomanDigits = map toUpper lowercaseRomanDigits - -- | Parses a roman numeral (uppercase or lowercase), returns number. romanNumeral :: Stream s m Char => Bool -- ^ Uppercase if true -> ParserT s st m Int romanNumeral upperCase = do - let romanDigits = if upperCase - then uppercaseRomanDigits - else lowercaseRomanDigits - lookAhead $ oneOf romanDigits - let [one, five, ten, fifty, hundred, fivehundred, thousand] = - map char romanDigits + let rchar uc = char $ if upperCase then uc else toLower uc + let one = rchar 'I' + let five = rchar 'V' + let ten = rchar 'X' + let fifty = rchar 'L' + let hundred = rchar 'C' + let fivehundred = rchar 'D' + let thousand = rchar 'M' + lookAhead $ choice [one, five, ten, fifty, hundred, fivehundred, thousand] thousands <- ((1000 *) . length) <$> many thousand ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900 fivehundreds <- option 0 $ 500 <$ fivehundred @@ -1289,7 +1286,7 @@ type SubstTable = M.Map Key Inlines -- unique identifier, and update the list of identifiers -- in state. Issue a warning if an explicit identifier -- is encountered that duplicates an earlier identifier --- (explict or automatically generated). +-- (explicit or automatically generated). registerHeader :: (Stream s m a, HasReaderOptions st, HasHeaderMap st, HasLogMessages st, HasIdentifierList st) => Attr -> Inlines -> ParserT s st m Attr diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 79a4abbc2..9c4f7a8ac 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -39,10 +39,12 @@ import Control.Monad.State import Data.Char (isAlphaNum, isLetter, isSpace, toLower) import Data.List (groupBy) import qualified Data.Map as Map +import Data.Maybe (mapMaybe) import Data.Text (Text, unpack) +import Text.Pandoc.Asciify (toAsciiChar) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition -import Text.Pandoc.Emoji (emojis) +import Text.Pandoc.Emoji (emojiToInline) import Text.Pandoc.Options import Text.Pandoc.Shared (stringify) import Text.Pandoc.Walk (walkM) @@ -51,7 +53,7 @@ import Text.Pandoc.Walk (walkM) readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readCommonMark opts s = return $ (if isEnabled Ext_gfm_auto_identifiers opts - then addHeaderIdentifiers + then addHeaderIdentifiers opts else id) $ nodeToPandoc opts $ commonmarkToNode opts' exts s where opts' = [ optSmart | isEnabled Ext_smart opts ] @@ -59,24 +61,27 @@ readCommonMark opts s = return $ [ extTable | isEnabled Ext_pipe_tables opts ] ++ [ extAutolink | isEnabled Ext_autolink_bare_uris opts ] -convertEmojis :: String -> String -convertEmojis (':':xs) = +convertEmojis :: String -> [Inline] +convertEmojis s@(':':xs) = case break (==':') xs of (ys,':':zs) -> - case Map.lookup ys emojis of - Just s -> s ++ convertEmojis zs - Nothing -> ':' : ys ++ convertEmojis (':':zs) - _ -> ':':xs -convertEmojis (x:xs) = x : convertEmojis xs -convertEmojis [] = [] - -addHeaderIdentifiers :: Pandoc -> Pandoc -addHeaderIdentifiers doc = evalState (walkM addHeaderId doc) mempty - -addHeaderId :: Block -> State (Map.Map String Int) Block -addHeaderId (Header lev (_,classes,kvs) ils) = do + case emojiToInline ys of + Just em -> em : convertEmojis zs + Nothing -> Str (':' : ys) : convertEmojis (':':zs) + _ -> [Str s] +convertEmojis s = + case break (==':') s of + ("","") -> [] + (_,"") -> [Str s] + (xs,ys) -> Str xs:convertEmojis ys + +addHeaderIdentifiers :: ReaderOptions -> Pandoc -> Pandoc +addHeaderIdentifiers opts doc = evalState (walkM (addHeaderId opts) doc) mempty + +addHeaderId :: ReaderOptions -> Block -> State (Map.Map String Int) Block +addHeaderId opts (Header lev (_,classes,kvs) ils) = do idmap <- get - let ident = toIdent ils + let ident = toIdent opts ils ident' <- case Map.lookup ident idmap of Nothing -> do put (Map.insert ident 1 idmap) @@ -85,13 +90,16 @@ addHeaderId (Header lev (_,classes,kvs) ils) = do put (Map.adjust (+ 1) ident idmap) return (ident ++ "-" ++ show i) return $ Header lev (ident',classes,kvs) ils -addHeaderId x = return x +addHeaderId _ x = return x -toIdent :: [Inline] -> String -toIdent = map (\c -> if isSpace c then '-' else c) - . filter (\c -> isLetter c || isAlphaNum c || isSpace c || - c == '_' || c == '-') - . map toLower . stringify +toIdent :: ReaderOptions -> [Inline] -> String +toIdent opts = map (\c -> if isSpace c then '-' else c) + . filterer + . map toLower . stringify + where filterer = if isEnabled Ext_ascii_identifiers opts + then mapMaybe toAsciiChar + else filter (\c -> isLetter c || isAlphaNum c || isSpace c || + c == '_' || c == '-') nodeToPandoc :: ReaderOptions -> Node -> Pandoc nodeToPandoc opts (Node _ DOCUMENT nodes) = @@ -200,17 +208,17 @@ addInlines :: ReaderOptions -> [Node] -> [Inline] addInlines opts = foldr (addInline opts) [] addInline :: ReaderOptions -> Node -> [Inline] -> [Inline] -addInline opts (Node _ (TEXT t) _) = (map toinl clumps ++) +addInline opts (Node _ (TEXT t) _) = (foldr ((++) . toinl) [] clumps ++) where raw = unpack t clumps = groupBy samekind raw samekind ' ' ' ' = True samekind ' ' _ = False samekind _ ' ' = False samekind _ _ = True - toinl (' ':_) = Space - toinl xs = Str $ if isEnabled Ext_emoji opts - then convertEmojis xs - else xs + toinl (' ':_) = [Space] + toinl xs = if isEnabled Ext_emoji opts + then convertEmojis xs + else [Str xs] addInline _ (Node _ LINEBREAK _) = (LineBreak :) addInline opts (Node _ SOFTBREAK _) | isEnabled Ext_hard_line_breaks opts = (LineBreak :) diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs index 4fd38c0fd..a337bf937 100644 --- a/src/Text/Pandoc/Readers/Creole.hs +++ b/src/Text/Pandoc/Readers/Creole.hs @@ -2,7 +2,7 @@ {- Copyright (C) 2017 Sascha Wilde <wilde@sha-bang.de> - partly based on all the other readers, especialy the work by + partly based on all the other readers, especially the work by John MacFarlane <jgm@berkeley.edu> and Alexander Sulfrian <alexander.sulfrian@fu-berlin.de> all bugs are solely created by me. diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 3d48c7ee8..b7bd71754 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -537,7 +537,6 @@ type DB m = StateT DBState m data DBState = DBState{ dbSectionLevel :: Int , dbQuoteType :: QuoteType , dbMeta :: Meta - , dbAcceptsMeta :: Bool , dbBook :: Bool , dbFigureTitle :: Inlines , dbContent :: [Content] @@ -547,7 +546,6 @@ instance Default DBState where def = DBState{ dbSectionLevel = 0 , dbQuoteType = DoubleQuote , dbMeta = mempty - , dbAcceptsMeta = False , dbBook = False , dbFigureTitle = mempty , dbContent = [] } @@ -609,18 +607,26 @@ named s e = qName (elName e) == s -- -acceptingMetadata :: PandocMonad m => DB m a -> DB m a -acceptingMetadata p = do - modify (\s -> s { dbAcceptsMeta = True } ) - res <- p - modify (\s -> s { dbAcceptsMeta = False }) - return res - -checkInMeta :: (PandocMonad m, Monoid a) => DB m () -> DB m a -checkInMeta p = do - accepts <- dbAcceptsMeta <$> get - when accepts p - return mempty +addMetadataFromElement :: PandocMonad m => Element -> DB m Blocks +addMetadataFromElement e = do + case filterChild (named "title") e of + Nothing -> return () + Just z -> do + getInlines z >>= addMeta "title" + addMetaField "subtitle" z + case filterChild (named "authorgroup") e of + Nothing -> return () + Just z -> addMetaField "author" z + addMetaField "subtitle" e + addMetaField "author" e + addMetaField "date" e + addMetaField "release" e + return mempty + where addMetaField fieldname elt = + case filterChildren (named fieldname) elt of + [] -> return () + [z] -> getInlines z >>= addMeta fieldname + zs -> mapM getInlines zs >>= addMeta fieldname addMeta :: PandocMonad m => ToMetaValue a => String -> a -> DB m () addMeta field val = modify (setMeta field val) @@ -718,11 +724,6 @@ parseBlock (Elem e) = "attribution" -> return mempty "titleabbrev" -> return mempty "authorinitials" -> return mempty - "title" -> checkInMeta getTitle - "author" -> checkInMeta getAuthor - "authorgroup" -> checkInMeta getAuthorGroup - "releaseinfo" -> checkInMeta (getInlines e >>= addMeta "release") - "date" -> checkInMeta getDate "bibliography" -> sect 0 "bibliodiv" -> sect 1 "biblioentry" -> parseMixed para (elContent e) @@ -788,8 +789,8 @@ parseBlock (Elem e) = "figure" -> getFigure e "mediaobject" -> para <$> getMediaobject e "caption" -> return mempty - "info" -> metaBlock - "articleinfo" -> metaBlock + "info" -> addMetadataFromElement e + "articleinfo" -> addMetadataFromElement e "sectioninfo" -> return mempty -- keywords & other metadata "refsectioninfo" -> return mempty -- keywords & other metadata "refsect1info" -> return mempty -- keywords & other metadata @@ -803,10 +804,11 @@ parseBlock (Elem e) = "chapterinfo" -> return mempty -- keywords & other metadata "glossaryinfo" -> return mempty -- keywords & other metadata "appendixinfo" -> return mempty -- keywords & other metadata - "bookinfo" -> metaBlock + "bookinfo" -> addMetadataFromElement e "article" -> modify (\st -> st{ dbBook = False }) >> - getBlocks e - "book" -> modify (\st -> st{ dbBook = True }) >> getBlocks e + addMetadataFromElement e >> getBlocks e + "book" -> modify (\st -> st{ dbBook = True }) >> + addMetadataFromElement e >> getBlocks e "table" -> parseTable "informaltable" -> parseTable "informalexample" -> divWith ("", ["informalexample"], []) <$> @@ -816,6 +818,8 @@ parseBlock (Elem e) = "screen" -> codeBlockWithLang "programlisting" -> codeBlockWithLang "?xml" -> return mempty + "title" -> return mempty -- handled in parent element + "subtitle" -> return mempty -- handled in parent element _ -> getBlocks e where parseMixed container conts = do let (ils,rest) = break isBlockElement conts @@ -857,19 +861,6 @@ parseBlock (Elem e) = terms' <- mapM getInlines terms items' <- mapM getBlocks items return (mconcat $ intersperse (str "; ") terms', items') - getTitle = do - tit <- getInlines e - subtit <- case filterChild (named "subtitle") e of - Just s -> (text ": " <>) <$> - getInlines s - Nothing -> return mempty - addMeta "title" (tit <> subtit) - - getAuthor = (:[]) <$> getInlines e >>= addMeta "author" - getAuthorGroup = do - let terms = filterChildren (named "author") e - mapM getInlines terms >>= addMeta "author" - getDate = getInlines e >>= addMeta "date" parseTable = do let isCaption x = named "title" x || named "caption" x caption <- case filterChild isCaption e of @@ -935,7 +926,6 @@ parseBlock (Elem e) = modify $ \st -> st{ dbSectionLevel = n - 1 } return $ headerWith (ident,[],[]) n' headerText <> b lineItems = mapM getInlines $ filterChildren (named "line") e - metaBlock = acceptingMetadata (getBlocks e) >> return mempty getInlines :: PandocMonad m => Element -> DB m Inlines getInlines e' = (trimInlines . mconcat) <$> diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs index 49ea71601..0be363f3d 100644 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -183,14 +183,13 @@ blocksToDefinitions' defAcc acc pair = if remainingAttr2 == ("", [], []) then (concatMap plainParaInlines blks1, [blks2]) else (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]]) in blocksToDefinitions' (pair : defAcc) acc blks -blocksToDefinitions' defAcc acc +blocksToDefinitions' ((defTerm, defItems):defs) acc (Div (ident2, classes2, kvs2) blks2 : blks) - | (not . null) defAcc && "Definition" `elem` classes2 = + | "Definition" `elem` classes2 = let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) defItems2 = case remainingAttr2 == ("", [], []) of True -> blks2 False -> [Div remainingAttr2 blks2] - ((defTerm, defItems):defs) = defAcc defAcc' = case null defItems of True -> (defTerm, [defItems2]) : defs False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 4c4c06073..b4e52de14 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -785,7 +785,7 @@ So we do this in a number of steps. If we encounter the fldchar begin tag, we start open a fldchar state variable (see state above). We add the instrtext to it as FieldInfo. Then we close that and start adding the runs when we get to separate. Then when we get to end, we produce -the Field type with approriate FieldInfo and Runs. +the Field type with appropriate FieldInfo and Runs. -} elemToParPart ns element | isElem ns "w" "r" element @@ -1056,8 +1056,10 @@ elemToRunStyle ns element parentStyle | Just rPr <- findChildByName ns "w" "rPr" element = RunStyle { - isBold = checkOnOff ns rPr (elemName ns "w" "b") - , isItalic = checkOnOff ns rPr (elemName ns "w" "i") + isBold = checkOnOff ns rPr (elemName ns "w" "b") `mplus` + checkOnOff ns rPr (elemName ns "w" "bCs") + , isItalic = checkOnOff ns rPr (elemName ns "w" "i") `mplus` + checkOnOff ns rPr (elemName ns "w" "iCs") , isSmallCaps = checkOnOff ns rPr (elemName ns "w" "smallCaps") , isStrike = checkOnOff ns rPr (elemName ns "w" "strike") , rVertAlign = @@ -1153,8 +1155,9 @@ getSymChar :: NameSpaces -> Element -> RunElem getSymChar ns element | Just s <- lowerFromPrivate <$> getCodepoint , Just font <- getFont = - let [(char, _)] = readLitChar ("\\x" ++ s) in - TextRun . maybe "" (:[]) $ getUnicode font char + case readLitChar ("\\x" ++ s) of + [(char, _)] -> TextRun . maybe "" (:[]) $ getUnicode font char + _ -> TextRun "" where getCodepoint = findAttrByName ns "w" "char" element getFont = stringToFont =<< findAttrByName ns "w" "font" element diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index c26447641..bfc3fc3ee 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -73,7 +73,7 @@ readEPUB opts bytes = case toArchiveOrFail bytes of -- runEPUB :: Except PandocError a -> Either PandocError a -- runEPUB = runExcept --- Note that internal reference are aggresively normalised so that all ids +-- Note that internal reference are aggressively normalised so that all ids -- are of the form "filename#id" -- archiveToEPUB :: (PandocMonad m) => ReaderOptions -> Archive -> m Pandoc diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 32a1ba5a6..b06e07a80 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -68,11 +68,13 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad (..)) import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps) import Text.Pandoc.Definition +import Text.Pandoc.Readers.LaTeX (rawLaTeXInline) +import Text.Pandoc.Readers.LaTeX.Types (Macro) import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.Options ( Extension (Ext_epub_html_exts, Ext_empty_paragraphs, Ext_native_divs, - Ext_native_spans, Ext_raw_html, Ext_line_blocks), + Ext_native_spans, Ext_raw_html, Ext_line_blocks, Ext_raw_tex), ReaderOptions (readerExtensions, readerStripComments), extensionEnabled) import Text.Pandoc.Parsing hiding ((<|>)) @@ -102,7 +104,8 @@ readHtml opts inp = do (m:_) -> messageString m result <- flip runReaderT def $ runParserT parseDoc - (HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty []) + (HTMLState def{ stateOptions = opts } + [] Nothing Set.empty M.empty [] M.empty) "source" tags case result of Right doc -> return doc @@ -124,7 +127,8 @@ data HTMLState = baseHref :: Maybe URI, identifiers :: Set.Set String, headerMap :: M.Map Inlines String, - logMessages :: [LogMessage] + logMessages :: [LogMessage], + macros :: M.Map Text Macro } data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext @@ -659,6 +663,7 @@ inline = choice , pCode , pSpan , pMath False + , pScriptMath , pRawHtmlInline ] @@ -745,18 +750,18 @@ pLink = try $ do let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $ maybeFromAttrib "id" tag let cls = words $ T.unpack $ fromAttrib "class" tag - lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a") + lab <- mconcat <$> manyTill inline (pCloses "a") -- check for href; if href, then a link, otherwise a span case maybeFromAttrib "href" tag of Nothing -> - return $ B.spanWith (uid, cls, []) lab + return $ extractSpaces (B.spanWith (uid, cls, [])) lab Just url' -> do mbBaseHref <- baseHref <$> getState let url = case (parseURIReference url', mbBaseHref) of (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs) _ -> url' - return $ B.linkWith (uid, cls, []) (escapeURI url) title lab + return $ extractSpaces (B.linkWith (uid, cls, []) (escapeURI url) title) lab pImage :: PandocMonad m => TagParser m Inlines pImage = do @@ -818,6 +823,17 @@ toStringAttr :: [(Text, Text)] -> [(String, String)] toStringAttr = map go where go (x,y) = (T.unpack x, T.unpack y) +pScriptMath :: PandocMonad m => TagParser m Inlines +pScriptMath = try $ do + TagOpen _ attr' <- pSatisfy $ tagOpen (=="script") (const True) + isdisplay <- case lookup "type" attr' of + Just x | "math/tex" `T.isPrefixOf` x + -> return $ "display" `T.isSuffixOf` x + _ -> mzero + contents <- T.unpack . innerText <$> + manyTill pAnyTag (pSatisfy (matchTagClose "script")) + return $ (if isdisplay then B.displayMath else B.math) contents + pMath :: PandocMonad m => Bool -> TagParser m Inlines pMath inCase = try $ do open@(TagOpen _ attr') <- pSatisfy $ tagOpen (=="math") (const True) @@ -852,7 +868,7 @@ pInTags' tagtype tagtest parser = try $ do pSatisfy (\t -> t ~== TagOpen tagtype [] && tagtest t) mconcat <$> manyTill parser (pCloses tagtype <|> eof) --- parses p, preceeded by an optional opening tag +-- parses p, preceded by an optional opening tag -- and followed by an optional closing tags pOptInTag :: PandocMonad m => Text -> TagParser m a -> TagParser m a pOptInTag tagtype p = try $ do @@ -907,9 +923,25 @@ pTagContents = <|> pStr <|> pSpace <|> smartPunctuation pTagContents + <|> pRawTeX <|> pSymbol <|> pBad +pRawTeX :: PandocMonad m => InlinesParser m Inlines +pRawTeX = do + lookAhead $ try $ do + char '\\' + choice $ map (try . string) ["begin", "eqref", "ref"] + guardEnabled Ext_raw_tex + inp <- getInput + st <- getState + res <- lift $ runParserT (withRaw rawLaTeXInline) st "chunk" (T.unpack inp) + case res of + Left _ -> mzero + Right (contents, raw) -> do + _ <- count (length raw) anyChar + return $ B.rawInline "tex" contents + pStr :: PandocMonad m => InlinesParser m Inlines pStr = do result <- many1 $ satisfy $ \c -> @@ -923,6 +955,7 @@ isSpecial '\'' = True isSpecial '.' = True isSpecial '-' = True isSpecial '$' = True +isSpecial '\\' = True isSpecial '\8216' = True isSpecial '\8217' = True isSpecial '\8220' = True @@ -1249,6 +1282,10 @@ isSpace _ = False -- Instances +instance HasMacros HTMLState where + extractMacros = macros + updateMacros f st = st{ macros = f $ macros st } + instance HasIdentifierList HTMLState where extractIdentifierList = identifiers updateIdentifierList f s = s{ identifiers = f (identifiers s) } @@ -1281,7 +1318,7 @@ instance HasLastStrPosition HTMLState where setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)} getLastStrPos = getLastStrPos . parserState --- For now we need a special verison here; the one in Shared has String type +-- For now we need a special version here; the one in Shared has String type renderTags' :: [Tag Text] -> Text renderTags' = renderTagsOptions renderOptions{ optMinimize = matchTags ["hr", "br", "img", diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 967037e4e..072bab350 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -44,11 +44,7 @@ readHaddockEither :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse -> Either PandocError Pandoc readHaddockEither _opts = -#if MIN_VERSION_haddock_library(1,2,0) - Right . B.doc . docHToBlocks . _doc . parseParas -#else - Right . B.doc . docHToBlocks . parseParas -#endif + Right . B.doc . docHToBlocks . _doc . parseParas Nothing docHToBlocks :: DocH String Identifier -> Blocks docHToBlocks d' = @@ -68,10 +64,8 @@ docHToBlocks d' = DocEmphasis _ -> inlineFallback DocMonospaced _ -> inlineFallback DocBold _ -> inlineFallback -#if MIN_VERSION_haddock_library(1,4,0) DocMathInline _ -> inlineFallback DocMathDisplay _ -> inlineFallback -#endif DocHeader h -> B.header (headerLevel h) (docHToInlines False $ headerTitle h) DocUnorderedList items -> B.bulletList (map docHToBlocks items) @@ -87,7 +81,6 @@ docHToBlocks d' = DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim s) DocExamples es -> mconcat $ map (\e -> makeExample ">>>" (exampleExpression e) (exampleResult e)) es -#if MIN_VERSION_haddock_library(1,5,0) DocTable H.Table{ tableHeaderRows = headerRows , tableBodyRows = bodyRows } @@ -100,7 +93,6 @@ docHToBlocks d' = colspecs = replicate (maximum (map length body)) (AlignDefault, 0.0) in B.table mempty colspecs header body -#endif where inlineFallback = B.plain $ docHToInlines False d' consolidatePlains = B.fromList . consolidatePlains' . B.toList @@ -133,10 +125,8 @@ docHToInlines isCode d' = DocMonospaced (DocString s) -> B.code s DocMonospaced d -> docHToInlines True d DocBold d -> B.strong (docHToInlines isCode d) -#if MIN_VERSION_haddock_library(1,4,0) DocMathInline s -> B.math s DocMathDisplay s -> B.displayMath s -#endif DocHeader _ -> mempty DocUnorderedList _ -> mempty DocOrderedList _ -> mempty @@ -149,9 +139,7 @@ docHToInlines isCode d' = DocAName s -> B.spanWith (s,["anchor"],[]) mempty DocProperty _ -> mempty DocExamples _ -> mempty -#if MIN_VERSION_haddock_library(1,5,0) DocTable _ -> mempty -#endif -- | Create an 'Example', stripping superfluous characters as appropriate makeExample :: String -> String -> [String] -> Blocks diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index 59af76d23..695c86b5d 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -191,7 +191,7 @@ parseBlock (Elem e) = listType -> do let start = fromMaybe 1 $ (strContent <$> (filterElement (named "list-item") e - >>= filterElement (named "lable"))) + >>= filterElement (named "label"))) >>= safeRead orderedListWith (start, parseListStyleType listType, DefaultDelim) <$> listitems diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 39dffde76..7c5619165 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -47,8 +47,7 @@ import Prelude import Control.Applicative (many, optional, (<|>)) import Control.Monad import Control.Monad.Except (throwError) -import Control.Monad.Trans (lift) -import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord, toLower, toUpper) +import Data.Char (isDigit, isLetter, toLower, toUpper) import Data.Default import Data.List (intercalate, isPrefixOf) import qualified Data.Map as M @@ -63,7 +62,7 @@ import Text.Pandoc.Builder import Text.Pandoc.Class (PandocMonad, PandocPure, getResourcePath, lookupEnv, readFileFromDirs, report, setResourcePath, setTranslations, translateTerm, trace) -import Text.Pandoc.Error (PandocError (PandocMacroLoop, PandocParseError, PandocParsecError)) +import Text.Pandoc.Error (PandocError ( PandocParseError, PandocParsecError)) import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension) import Text.Pandoc.ImageSize (numUnit, showFl) import Text.Pandoc.Logging @@ -71,12 +70,15 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline, optional, space, spaces, withRaw, (<|>)) import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..), - Tok (..), TokType (..)) + ArgSpec (..), Tok (..), TokType (..)) +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47, + babelLangToBCP47) import Text.Pandoc.Shared import qualified Text.Pandoc.Translations as Translations import Text.Pandoc.Walk -import Text.Parsec.Pos import qualified Text.Pandoc.Builder as B +import qualified Data.Text.Normalize as Normalize -- for debugging: -- import Text.Pandoc.Extensions (getDefaultExtensions) @@ -137,482 +139,49 @@ resolveRefs _ x = x -- Left e -> error (show e) -- Right r -> return r -newtype HeaderNum = HeaderNum [Int] - deriving (Show) - -renderHeaderNum :: HeaderNum -> String -renderHeaderNum (HeaderNum xs) = - intercalate "." (map show xs) - -incrementHeaderNum :: Int -> HeaderNum -> HeaderNum -incrementHeaderNum level (HeaderNum ns) = HeaderNum $ - case reverse (take level (ns ++ repeat 0)) of - (x:xs) -> reverse (x+1 : xs) - [] -> [] -- shouldn't happen - -data LaTeXState = LaTeXState{ sOptions :: ReaderOptions - , sMeta :: Meta - , sQuoteContext :: QuoteContext - , sMacros :: M.Map Text Macro - , sContainers :: [String] - , sHeaders :: M.Map Inlines String - , sLogMessages :: [LogMessage] - , sIdentifiers :: Set.Set String - , sVerbatimMode :: Bool - , sCaption :: Maybe Inlines - , sInListItem :: Bool - , sInTableCell :: Bool - , sLastHeaderNum :: HeaderNum - , sLabels :: M.Map String [Inline] - , sHasChapters :: Bool - , sToggles :: M.Map String Bool - } - deriving Show - -defaultLaTeXState :: LaTeXState -defaultLaTeXState = LaTeXState{ sOptions = def - , sMeta = nullMeta - , sQuoteContext = NoQuote - , sMacros = M.empty - , sContainers = [] - , sHeaders = M.empty - , sLogMessages = [] - , sIdentifiers = Set.empty - , sVerbatimMode = False - , sCaption = Nothing - , sInListItem = False - , sInTableCell = False - , sLastHeaderNum = HeaderNum [] - , sLabels = M.empty - , sHasChapters = False - , sToggles = M.empty - } - -instance PandocMonad m => HasQuoteContext LaTeXState m where - getQuoteContext = sQuoteContext <$> getState - withQuoteContext context parser = do - oldState <- getState - let oldQuoteContext = sQuoteContext oldState - setState oldState { sQuoteContext = context } - result <- parser - newState <- getState - setState newState { sQuoteContext = oldQuoteContext } - return result - -instance HasLogMessages LaTeXState where - addLogMessage msg st = st{ sLogMessages = msg : sLogMessages st } - getLogMessages st = reverse $ sLogMessages st - -instance HasIdentifierList LaTeXState where - extractIdentifierList = sIdentifiers - updateIdentifierList f st = st{ sIdentifiers = f $ sIdentifiers st } - -instance HasIncludeFiles LaTeXState where - getIncludeFiles = sContainers - addIncludeFile f s = s{ sContainers = f : sContainers s } - dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s } - -instance HasHeaderMap LaTeXState where - extractHeaderMap = sHeaders - updateHeaderMap f st = st{ sHeaders = f $ sHeaders st } - -instance HasMacros LaTeXState where - extractMacros st = sMacros st - updateMacros f st = st{ sMacros = f (sMacros st) } - -instance HasReaderOptions LaTeXState where - extractReaderOptions = sOptions - -instance HasMeta LaTeXState where - setMeta field val st = - st{ sMeta = setMeta field val $ sMeta st } - deleteMeta field st = - st{ sMeta = deleteMeta field $ sMeta st } - -instance Default LaTeXState where - def = defaultLaTeXState - -type LP m = ParserT [Tok] LaTeXState m - -withVerbatimMode :: PandocMonad m => LP m a -> LP m a -withVerbatimMode parser = do - updateState $ \st -> st{ sVerbatimMode = True } - result <- parser - updateState $ \st -> st{ sVerbatimMode = False } - return result - -rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => LP m a -> LP m a -> ParserT String s m (a, String) -rawLaTeXParser parser valParser = do - inp <- getInput - let toks = tokenize "source" $ T.pack inp - pstate <- getState - let lstate = def{ sOptions = extractReaderOptions pstate } - let lstate' = lstate { sMacros = extractMacros pstate } - let rawparser = (,) <$> withRaw valParser <*> getState - res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks - case res' of - Left _ -> mzero - Right toks' -> do - res <- lift $ runParserT (do doMacros 0 - -- retokenize, applying macros - ts <- many (satisfyTok (const True)) - setInput ts - rawparser) - lstate' "chunk" toks' - case res of - Left _ -> mzero - Right ((val, raw), st) -> do - updateState (updateMacros (sMacros st <>)) - _ <- takeP (T.length (untokenize toks')) - return (val, T.unpack (untokenize raw)) - -applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => String -> ParserT String s m String -applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|> - do let retokenize = doMacros 0 *> - (toksToString <$> many (satisfyTok (const True))) - pstate <- getState - let lstate = def{ sOptions = extractReaderOptions pstate - , sMacros = extractMacros pstate } - res <- runParserT retokenize lstate "math" (tokenize "math" (T.pack s)) - case res of - Left e -> fail (show e) - Right s' -> return s' rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s) => ParserT String s m String rawLaTeXBlock = do lookAhead (try (char '\\' >> letter)) - -- we don't want to apply newly defined latex macros to their own - -- definitions: - snd <$> rawLaTeXParser (environment <|> macroDef <|> blockCommand) blocks + snd <$> (rawLaTeXParser False macroDef blocks + <|> (rawLaTeXParser True + (do choice (map controlSeq + ["include", "input", "subfile", "usepackage"]) + skipMany opt + braced + return mempty) blocks) + <|> rawLaTeXParser True + (environment <|> blockCommand) + (mconcat <$> (many (block <|> beginOrEndCommand)))) + +-- See #4667 for motivation; sometimes people write macros +-- that just evaluate to a begin or end command, which blockCommand +-- won't accept. +beginOrEndCommand :: PandocMonad m => LP m Blocks +beginOrEndCommand = try $ do + Tok _ (CtrlSeq name) txt <- anyControlSeq + guard $ name == "begin" || name == "end" + (envname, rawargs) <- withRaw braced + if M.member (untokenize envname) + (inlineEnvironments :: M.Map Text (LP PandocPure Inlines)) + then mzero + else return $ rawBlock "latex" + (T.unpack (txt <> untokenize rawargs)) rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s) => ParserT String s m String rawLaTeXInline = do lookAhead (try (char '\\' >> letter)) - snd <$> rawLaTeXParser (inlineEnvironment <|> inlineCommand') inlines + snd <$> ( rawLaTeXParser True + (mempty <$ (controlSeq "input" >> skipMany opt >> braced)) + inlines + <|> rawLaTeXParser True (inlineEnvironment <|> inlineCommand') inlines) inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines inlineCommand = do lookAhead (try (char '\\' >> letter)) - fst <$> rawLaTeXParser (inlineEnvironment <|> inlineCommand') inlines - -tokenize :: SourceName -> Text -> [Tok] -tokenize sourcename = totoks (initialPos sourcename) - -totoks :: SourcePos -> Text -> [Tok] -totoks pos t = - case T.uncons t of - Nothing -> [] - Just (c, rest) - | c == '\n' -> - Tok pos Newline "\n" - : totoks (setSourceColumn (incSourceLine pos 1) 1) rest - | isSpaceOrTab c -> - let (sps, rest') = T.span isSpaceOrTab t - in Tok pos Spaces sps - : totoks (incSourceColumn pos (T.length sps)) - rest' - | isAlphaNum c -> - let (ws, rest') = T.span isAlphaNum t - in Tok pos Word ws - : totoks (incSourceColumn pos (T.length ws)) rest' - | c == '%' -> - let (cs, rest') = T.break (== '\n') rest - in Tok pos Comment ("%" <> cs) - : totoks (incSourceColumn pos (1 + T.length cs)) rest' - | c == '\\' -> - case T.uncons rest of - Nothing -> [Tok pos (CtrlSeq " ") "\\"] - Just (d, rest') - | isLetterOrAt d -> - -- \makeatletter is common in macro defs; - -- ideally we should make tokenization sensitive - -- to \makeatletter and \makeatother, but this is - -- probably best for now - let (ws, rest'') = T.span isLetterOrAt rest - (ss, rest''') = T.span isSpaceOrTab rest'' - in Tok pos (CtrlSeq ws) ("\\" <> ws <> ss) - : totoks (incSourceColumn pos - (1 + T.length ws + T.length ss)) rest''' - | isSpaceOrTab d || d == '\n' -> - let (w1, r1) = T.span isSpaceOrTab rest - (w2, (w3, r3)) = case T.uncons r1 of - Just ('\n', r2) - -> (T.pack "\n", - T.span isSpaceOrTab r2) - _ -> (mempty, (mempty, r1)) - ws = "\\" <> w1 <> w2 <> w3 - in case T.uncons r3 of - Just ('\n', _) -> - Tok pos (CtrlSeq " ") ("\\" <> w1) - : totoks (incSourceColumn pos (T.length ws)) - r1 - _ -> - Tok pos (CtrlSeq " ") ws - : totoks (incSourceColumn pos (T.length ws)) - r3 - | otherwise -> - Tok pos (CtrlSeq (T.singleton d)) (T.pack [c,d]) - : totoks (incSourceColumn pos 2) rest' - | c == '#' -> - let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest - in case safeRead (T.unpack t1) of - Just i -> - Tok pos (Arg i) ("#" <> t1) - : totoks (incSourceColumn pos (1 + T.length t1)) t2 - Nothing -> - Tok pos Symbol "#" - : totoks (incSourceColumn pos 1) t2 - | c == '^' -> - case T.uncons rest of - Just ('^', rest') -> - case T.uncons rest' of - Just (d, rest'') - | isLowerHex d -> - case T.uncons rest'' of - Just (e, rest''') | isLowerHex e -> - Tok pos Esc2 (T.pack ['^','^',d,e]) - : totoks (incSourceColumn pos 4) rest''' - _ -> - Tok pos Esc1 (T.pack ['^','^',d]) - : totoks (incSourceColumn pos 3) rest'' - | d < '\128' -> - Tok pos Esc1 (T.pack ['^','^',d]) - : totoks (incSourceColumn pos 3) rest'' - _ -> Tok pos Symbol "^" : - Tok (incSourceColumn pos 1) Symbol "^" : - totoks (incSourceColumn pos 2) rest' - _ -> Tok pos Symbol "^" - : totoks (incSourceColumn pos 1) rest - | otherwise -> - Tok pos Symbol (T.singleton c) : totoks (incSourceColumn pos 1) rest - -isSpaceOrTab :: Char -> Bool -isSpaceOrTab ' ' = True -isSpaceOrTab '\t' = True -isSpaceOrTab _ = False - -isLetterOrAt :: Char -> Bool -isLetterOrAt '@' = True -isLetterOrAt c = isLetter c - -isLowerHex :: Char -> Bool -isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f' - -untokenize :: [Tok] -> Text -untokenize = mconcat . map untoken - -untoken :: Tok -> Text -untoken (Tok _ _ t) = t - -satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok -satisfyTok f = - try $ do - res <- tokenPrim (T.unpack . untoken) updatePos matcher - doMacros 0 -- apply macros on remaining input stream - return res - where matcher t | f t = Just t - | otherwise = Nothing - updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos - updatePos _spos _ (Tok pos _ _ : _) = pos - updatePos spos _ [] = incSourceColumn spos 1 - -doMacros :: PandocMonad m => Int -> LP m () -doMacros n = do - verbatimMode <- sVerbatimMode <$> getState - unless verbatimMode $ do - inp <- getInput - case inp of - Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" : - Tok _ Word name : Tok _ Symbol "}" : ts - -> handleMacros spos name ts - Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" : - Tok _ Word name : Tok _ Symbol "}" : ts - -> handleMacros spos ("end" <> name) ts - Tok _ (CtrlSeq "expandafter") _ : t : ts - -> do setInput ts - doMacros n - getInput >>= setInput . combineTok t - Tok spos (CtrlSeq name) _ : ts - -> handleMacros spos name ts - _ -> return () - where combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts) - | T.all isLetterOrAt w = - Tok spos (CtrlSeq (name <> w)) (x1 <> w <> x2) : ts - where (x1, x2) = T.break isSpaceOrTab x - combineTok t ts = t:ts - handleMacros spos name ts = do - macros <- sMacros <$> getState - case M.lookup name macros of - Nothing -> return () - Just (Macro expansionPoint numargs optarg newtoks) -> do - setInput ts - let getarg = try $ spaces >> bracedOrToken - args <- case optarg of - Nothing -> count numargs getarg - Just o -> - (:) <$> option o bracketedToks - <*> count (numargs - 1) getarg - -- first boolean param is true if we're tokenizing - -- an argument (in which case we don't want to - -- expand #1 etc.) - let addTok False (Tok _ (Arg i) _) acc | i > 0 - , i <= numargs = - foldr (addTok True) acc (args !! (i - 1)) - -- add space if needed after control sequence - -- see #4007 - addTok _ (Tok _ (CtrlSeq x) txt) - acc@(Tok _ Word _ : _) - | not (T.null txt) && - isLetter (T.last txt) = - Tok spos (CtrlSeq x) (txt <> " ") : acc - addTok _ t acc = setpos spos t : acc - ts' <- getInput - setInput $ foldr (addTok False) ts' newtoks - case expansionPoint of - ExpandWhenUsed -> - if n > 20 -- detect macro expansion loops - then throwError $ PandocMacroLoop (T.unpack name) - else doMacros (n + 1) - ExpandWhenDefined -> return () - - -setpos :: SourcePos -> Tok -> Tok -setpos spos (Tok _ tt txt) = Tok spos tt txt - -anyControlSeq :: PandocMonad m => LP m Tok -anyControlSeq = satisfyTok isCtrlSeq - where isCtrlSeq (Tok _ (CtrlSeq _) _) = True - isCtrlSeq _ = False - -anySymbol :: PandocMonad m => LP m Tok -anySymbol = satisfyTok isSym - where isSym (Tok _ Symbol _) = True - isSym _ = False - -spaces :: PandocMonad m => LP m () -spaces = skipMany (satisfyTok (tokTypeIn [Comment, Spaces, Newline])) - -spaces1 :: PandocMonad m => LP m () -spaces1 = skipMany1 (satisfyTok (tokTypeIn [Comment, Spaces, Newline])) - -tokTypeIn :: [TokType] -> Tok -> Bool -tokTypeIn toktypes (Tok _ tt _) = tt `elem` toktypes - -controlSeq :: PandocMonad m => Text -> LP m Tok -controlSeq name = satisfyTok isNamed - where isNamed (Tok _ (CtrlSeq n) _) = n == name - isNamed _ = False - -symbol :: PandocMonad m => Char -> LP m Tok -symbol c = satisfyTok isc - where isc (Tok _ Symbol d) = case T.uncons d of - Just (c',_) -> c == c' - _ -> False - isc _ = False - -symbolIn :: PandocMonad m => [Char] -> LP m Tok -symbolIn cs = satisfyTok isInCs - where isInCs (Tok _ Symbol d) = case T.uncons d of - Just (c,_) -> c `elem` cs - _ -> False - isInCs _ = False - -sp :: PandocMonad m => LP m () -sp = whitespace <|> endline - -whitespace :: PandocMonad m => LP m () -whitespace = () <$ satisfyTok isSpaceTok - where isSpaceTok (Tok _ Spaces _) = True - isSpaceTok _ = False - -newlineTok :: PandocMonad m => LP m () -newlineTok = () <$ satisfyTok isNewlineTok - -isNewlineTok :: Tok -> Bool -isNewlineTok (Tok _ Newline _) = True -isNewlineTok _ = False - -comment :: PandocMonad m => LP m () -comment = () <$ satisfyTok isCommentTok - where isCommentTok (Tok _ Comment _) = True - isCommentTok _ = False - -anyTok :: PandocMonad m => LP m Tok -anyTok = satisfyTok (const True) - -endline :: PandocMonad m => LP m () -endline = try $ do - newlineTok - lookAhead anyTok - notFollowedBy blankline - -blankline :: PandocMonad m => LP m () -blankline = try $ skipMany whitespace *> newlineTok - -primEscape :: PandocMonad m => LP m Char -primEscape = do - Tok _ toktype t <- satisfyTok (tokTypeIn [Esc1, Esc2]) - case toktype of - Esc1 -> case T.uncons (T.drop 2 t) of - Just (c, _) - | c >= '\64' && c <= '\127' -> return (chr (ord c - 64)) - | otherwise -> return (chr (ord c + 64)) - Nothing -> fail "Empty content of Esc1" - Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of - Just x -> return (chr x) - Nothing -> fail $ "Could not read: " ++ T.unpack t - _ -> fail "Expected an Esc1 or Esc2 token" -- should not happen - -bgroup :: PandocMonad m => LP m Tok -bgroup = try $ do - skipMany sp - symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup" - -egroup :: PandocMonad m => LP m Tok -egroup = symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup" - -grouped :: (PandocMonad m, Monoid a) => LP m a -> LP m a -grouped parser = try $ do - bgroup - -- first we check for an inner 'grouped', because - -- {{a,b}} should be parsed the same as {a,b} - try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup) - -braced :: PandocMonad m => LP m [Tok] -braced = bgroup *> braced' 1 - where braced' (n :: Int) = - handleEgroup n <|> handleBgroup n <|> handleOther n - handleEgroup n = do - t <- egroup - if n == 1 - then return [] - else (t:) <$> braced' (n - 1) - handleBgroup n = do - t <- bgroup - (t:) <$> braced' (n + 1) - handleOther n = do - t <- anyTok - (t:) <$> braced' n - -bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a -bracketed parser = try $ do - symbol '[' - mconcat <$> manyTill parser (symbol ']') - -dimenarg :: PandocMonad m => LP m Text -dimenarg = try $ do - ch <- option False $ True <$ symbol '=' - Tok _ _ s <- satisfyTok isWordTok - guard $ T.take 2 (T.reverse s) `elem` - ["pt","pc","in","bp","cm","mm","dd","cc","sp"] - let num = T.take (T.length s - 2) s - guard $ T.length num > 0 - guard $ T.all isDigit num - return $ T.pack ['=' | ch] <> s + fst <$> rawLaTeXParser True (inlineEnvironment <|> inlineCommand') inlines -- inline elements: @@ -625,13 +194,6 @@ regularSymbol = (str . T.unpack . untoken) <$> satisfyTok isRegularSymbol isRegularSymbol _ = False isSpecial c = c `Set.member` specialChars -specialChars :: Set.Set Char -specialChars = Set.fromList "#$%&~_^\\{}" - -isWordTok :: Tok -> Bool -isWordTok (Tok _ Word _) = True -isWordTok _ = False - inlineGroup :: PandocMonad m => LP m Inlines inlineGroup = do ils <- grouped inline @@ -678,7 +240,7 @@ dosiunitx = do skipopts value <- tok valueprefix <- option "" $ bracketed tok - unit <- inlineCommand' <|> tok + unit <- grouped (mconcat <$> many1 siUnit) <|> siUnit <|> tok let emptyOr160 "" = "" emptyOr160 _ = "\160" return . mconcat $ [valueprefix, @@ -687,11 +249,187 @@ dosiunitx = do emptyOr160 unit, unit] --- siunitx's \square command -dosquare :: PandocMonad m => LP m Inlines -dosquare = do - unit <- inlineCommand' <|> tok - return . mconcat $ [unit, "\178"] +siUnit :: PandocMonad m => LP m Inlines +siUnit = do + Tok _ (CtrlSeq name) _ <- anyControlSeq + if name == "square" + then do + unit <- grouped (mconcat <$> many1 siUnit) <|> siUnit <|> tok + return . mconcat $ [unit, "\178"] + else + case M.lookup name siUnitMap of + Just il -> return il + Nothing -> mzero + +siUnitMap :: M.Map Text Inlines +siUnitMap = M.fromList + [ ("fg", str "fg") + , ("pg", str "pg") + , ("ng", str "ng") + , ("ug", str "μg") + , ("mg", str "mg") + , ("g", str "g") + , ("kg", str "kg") + , ("amu", str "u") + , ("pm", str "pm") + , ("nm", str "nm") + , ("um", str "μm") + , ("mm", str "mm") + , ("cm", str "cm") + , ("dm", str "dm") + , ("m", str "m") + , ("km", str "km") + , ("as", str "as") + , ("fs", str "fs") + , ("ps", str "ps") + , ("ns", str "ns") + , ("us", str "μs") + , ("ms", str "ms") + , ("s", str "s") + , ("fmol", str "fmol") + , ("pmol", str "pmol") + , ("nmol", str "nmol") + , ("umol", str "μmol") + , ("mmol", str "mmol") + , ("mol", str "mol") + , ("kmol", str "kmol") + , ("pA", str "pA") + , ("nA", str "nA") + , ("uA", str "μA") + , ("mA", str "mA") + , ("A", str "A") + , ("kA", str "kA") + , ("ul", str "μl") + , ("ml", str "ml") + , ("l", str "l") + , ("hl", str "hl") + , ("uL", str "μL") + , ("mL", str "mL") + , ("L", str "L") + , ("hL", str "hL") + , ("mHz", str "mHz") + , ("Hz", str "Hz") + , ("kHz", str "kHz") + , ("MHz", str "MHz") + , ("GHz", str "GHz") + , ("THz", str "THz") + , ("mN", str "mN") + , ("N", str "N") + , ("kN", str "kN") + , ("MN", str "MN") + , ("Pa", str "Pa") + , ("kPa", str "kPa") + , ("MPa", str "MPa") + , ("GPa", str "GPa") + , ("mohm", str "mΩ") + , ("kohm", str "kΩ") + , ("Mohm", str "MΩ") + , ("pV", str "pV") + , ("nV", str "nV") + , ("uV", str "μV") + , ("mV", str "mV") + , ("V", str "V") + , ("kV", str "kV") + , ("W", str "W") + , ("uW", str "μW") + , ("mW", str "mW") + , ("kW", str "kW") + , ("MW", str "MW") + , ("GW", str "GW") + , ("J", str "J") + , ("uJ", str "μJ") + , ("mJ", str "mJ") + , ("kJ", str "kJ") + , ("eV", str "eV") + , ("meV", str "meV") + , ("keV", str "keV") + , ("MeV", str "MeV") + , ("GeV", str "GeV") + , ("TeV", str "TeV") + , ("kWh", str "kWh") + , ("F", str "F") + , ("fF", str "fF") + , ("pF", str "pF") + , ("K", str "K") + , ("dB", str "dB") + , ("angstrom", str "Å") + , ("arcmin", str "′") + , ("arcminute", str "′") + , ("arcsecond", str "″") + , ("astronomicalunit", str "ua") + , ("atomicmassunit", str "u") + , ("atto", str "a") + , ("bar", str "bar") + , ("barn", str "b") + , ("becquerel", str "Bq") + , ("bel", str "B") + , ("candela", str "cd") + , ("celsius", str "°C") + , ("centi", str "c") + , ("coulomb", str "C") + , ("dalton", str "Da") + , ("day", str "d") + , ("deca", str "d") + , ("deci", str "d") + , ("decibel", str "db") + , ("degreeCelsius",str "°C") + , ("degree", str "°") + , ("deka", str "d") + , ("electronvolt", str "eV") + , ("exa", str "E") + , ("farad", str "F") + , ("femto", str "f") + , ("giga", str "G") + , ("gram", str "g") + , ("hectare", str "ha") + , ("hecto", str "h") + , ("henry", str "H") + , ("hertz", str "Hz") + , ("hour", str "h") + , ("joule", str "J") + , ("katal", str "kat") + , ("kelvin", str "K") + , ("kilo", str "k") + , ("kilogram", str "kg") + , ("knot", str "kn") + , ("liter", str "L") + , ("litre", str "l") + , ("lumen", str "lm") + , ("lux", str "lx") + , ("mega", str "M") + , ("meter", str "m") + , ("metre", str "m") + , ("milli", str "m") + , ("minute", str "min") + , ("mmHg", str "mmHg") + , ("mole", str "mol") + , ("nano", str "n") + , ("nauticalmile", str "M") + , ("neper", str "Np") + , ("newton", str "N") + , ("ohm", str "Ω") + , ("Pa", str "Pa") + , ("pascal", str "Pa") + , ("percent", str "%") + , ("per", str "/") + , ("peta", str "P") + , ("pico", str "p") + , ("radian", str "rad") + , ("second", str "s") + , ("siemens", str "S") + , ("sievert", str "Sv") + , ("steradian", str "sr") + , ("tera", str "T") + , ("tesla", str "T") + , ("tonne", str "t") + , ("volt", str "V") + , ("watt", str "W") + , ("weber", str "Wb") + , ("yocto", str "y") + , ("yotta", str "Y") + , ("zepto", str "z") + , ("zetta", str "Z") + ] lit :: String -> LP m Inlines lit = pure . str @@ -742,13 +480,31 @@ quoted' f starter ender = do cs -> cs) else lit startchs -enquote :: PandocMonad m => LP m Inlines -enquote = do +enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines +enquote starred mblang = do skipopts + let lang = (T.unpack <$> mblang) >>= babelLangToBCP47 + let langspan = case lang of + Nothing -> id + Just l -> spanWith ("",[],[("lang", renderLang l)]) quoteContext <- sQuoteContext <$> getState - if quoteContext == InDoubleQuote - then singleQuoted <$> withQuoteContext InSingleQuote tok - else doubleQuoted <$> withQuoteContext InDoubleQuote tok + if starred || quoteContext == InDoubleQuote + then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok + else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok + +blockquote :: PandocMonad m => Bool -> Maybe Text -> LP m Blocks +blockquote citations mblang = do + citePar <- if citations + then do + cs <- cites NormalCitation False + return $ para (cite cs mempty) + else return mempty + let lang = (T.unpack <$> mblang) >>= babelLangToBCP47 + let langdiv = case lang of + Nothing -> id + Just l -> divWith ("",[],[("lang", renderLang l)]) + bs <- grouped block + return $ blockQuote . langdiv $ (bs <> citePar) doAcronym :: PandocMonad m => String -> LP m Inlines doAcronym form = do @@ -791,6 +547,16 @@ dolstinline :: PandocMonad m => LP m Inlines dolstinline = do options <- option [] keyvals let classes = maybeToList $ lookup "language" options >>= fromListingsLanguage + doinlinecode classes + +domintinline :: PandocMonad m => LP m Inlines +domintinline = do + skipopts + cls <- toksToString <$> braced + doinlinecode [cls] + +doinlinecode :: PandocMonad m => [String] -> LP m Inlines +doinlinecode classes = do Tok _ Symbol t <- anySymbol marker <- case T.uncons t of Just (c, ts) | T.null ts -> return c @@ -803,246 +569,41 @@ dolstinline = do keyval :: PandocMonad m => LP m (String, String) keyval = try $ do Tok _ Word key <- satisfyTok isWordTok - let isSpecSym (Tok _ Symbol t) = t /= "]" && t /= "," - isSpecSym _ = False optional sp - val <- option [] $ do + val <- option mempty $ do symbol '=' optional sp - braced <|> many1 (satisfyTok isWordTok <|> satisfyTok isSpecSym - <|> anyControlSeq) - optional sp + (untokenize <$> braced) <|> + (mconcat <$> many1 ( + (untokenize . snd <$> withRaw braced) + <|> + (untokenize <$> (many1 + (satisfyTok + (\t -> case t of + Tok _ Symbol "]" -> False + Tok _ Symbol "," -> False + Tok _ Symbol "{" -> False + Tok _ Symbol "}" -> False + _ -> True)))))) optional (symbol ',') optional sp - return (T.unpack key, T.unpack . untokenize $ val) + return (T.unpack key, T.unpack $ T.strip val) keyvals :: PandocMonad m => LP m [(String, String)] keyvals = try $ symbol '[' >> manyTill keyval (symbol ']') -accent :: PandocMonad m => Char -> (Char -> String) -> LP m Inlines -accent c f = try $ do +accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines +accent combiningAccent fallBack = try $ do ils <- tok case toList ils of - (Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys) - [Space] -> return $ str [c] - [] -> return $ str [c] + (Str (x:xs) : ys) -> return $ fromList $ + -- try to normalize to the combined character: + Str (T.unpack + (Normalize.normalize Normalize.NFC + (T.pack [x, combiningAccent])) ++ xs) : ys + [Space] -> return $ str [fromMaybe combiningAccent fallBack] + [] -> return $ str [fromMaybe combiningAccent fallBack] _ -> return ils - -grave :: Char -> String -grave 'A' = "À" -grave 'E' = "È" -grave 'I' = "Ì" -grave 'O' = "Ò" -grave 'U' = "Ù" -grave 'a' = "à" -grave 'e' = "è" -grave 'i' = "ì" -grave 'o' = "ò" -grave 'u' = "ù" -grave c = [c] - -acute :: Char -> String -acute 'A' = "Á" -acute 'E' = "É" -acute 'I' = "Í" -acute 'O' = "Ó" -acute 'U' = "Ú" -acute 'Y' = "Ý" -acute 'a' = "á" -acute 'e' = "é" -acute 'i' = "í" -acute 'o' = "ó" -acute 'u' = "ú" -acute 'y' = "ý" -acute 'C' = "Ć" -acute 'c' = "ć" -acute 'L' = "Ĺ" -acute 'l' = "ĺ" -acute 'N' = "Ń" -acute 'n' = "ń" -acute 'R' = "Ŕ" -acute 'r' = "ŕ" -acute 'S' = "Ś" -acute 's' = "ś" -acute 'Z' = "Ź" -acute 'z' = "ź" -acute c = [c] - -circ :: Char -> String -circ 'A' = "Â" -circ 'E' = "Ê" -circ 'I' = "Î" -circ 'O' = "Ô" -circ 'U' = "Û" -circ 'a' = "â" -circ 'e' = "ê" -circ 'i' = "î" -circ 'o' = "ô" -circ 'u' = "û" -circ 'C' = "Ĉ" -circ 'c' = "ĉ" -circ 'G' = "Ĝ" -circ 'g' = "ĝ" -circ 'H' = "Ĥ" -circ 'h' = "ĥ" -circ 'J' = "Ĵ" -circ 'j' = "ĵ" -circ 'S' = "Ŝ" -circ 's' = "ŝ" -circ 'W' = "Ŵ" -circ 'w' = "ŵ" -circ 'Y' = "Ŷ" -circ 'y' = "ŷ" -circ c = [c] - -tilde :: Char -> String -tilde 'A' = "Ã" -tilde 'a' = "ã" -tilde 'O' = "Õ" -tilde 'o' = "õ" -tilde 'I' = "Ĩ" -tilde 'i' = "ĩ" -tilde 'U' = "Ũ" -tilde 'u' = "ũ" -tilde 'N' = "Ñ" -tilde 'n' = "ñ" -tilde c = [c] - -umlaut :: Char -> String -umlaut 'A' = "Ä" -umlaut 'E' = "Ë" -umlaut 'I' = "Ï" -umlaut 'O' = "Ö" -umlaut 'U' = "Ü" -umlaut 'a' = "ä" -umlaut 'e' = "ë" -umlaut 'i' = "ï" -umlaut 'o' = "ö" -umlaut 'u' = "ü" -umlaut c = [c] - -hungarumlaut :: Char -> String -hungarumlaut 'A' = "A̋" -hungarumlaut 'E' = "E̋" -hungarumlaut 'I' = "I̋" -hungarumlaut 'O' = "Ő" -hungarumlaut 'U' = "Ű" -hungarumlaut 'Y' = "ӳ" -hungarumlaut 'a' = "a̋" -hungarumlaut 'e' = "e̋" -hungarumlaut 'i' = "i̋" -hungarumlaut 'o' = "ő" -hungarumlaut 'u' = "ű" -hungarumlaut 'y' = "ӳ" -hungarumlaut c = [c] - -dot :: Char -> String -dot 'C' = "Ċ" -dot 'c' = "ċ" -dot 'E' = "Ė" -dot 'e' = "ė" -dot 'G' = "Ġ" -dot 'g' = "ġ" -dot 'I' = "İ" -dot 'Z' = "Ż" -dot 'z' = "ż" -dot c = [c] - -macron :: Char -> String -macron 'A' = "Ā" -macron 'E' = "Ē" -macron 'I' = "Ī" -macron 'O' = "Ō" -macron 'U' = "Ū" -macron 'a' = "ā" -macron 'e' = "ē" -macron 'i' = "ī" -macron 'o' = "ō" -macron 'u' = "ū" -macron c = [c] - -cedilla :: Char -> String -cedilla 'c' = "ç" -cedilla 'C' = "Ç" -cedilla 's' = "ş" -cedilla 'S' = "Ş" -cedilla 't' = "ţ" -cedilla 'T' = "Ţ" -cedilla 'e' = "ȩ" -cedilla 'E' = "Ȩ" -cedilla 'h' = "ḩ" -cedilla 'H' = "Ḩ" -cedilla 'o' = "o̧" -cedilla 'O' = "O̧" -cedilla c = [c] - -hacek :: Char -> String -hacek 'A' = "Ǎ" -hacek 'a' = "ǎ" -hacek 'C' = "Č" -hacek 'c' = "č" -hacek 'D' = "Ď" -hacek 'd' = "ď" -hacek 'E' = "Ě" -hacek 'e' = "ě" -hacek 'G' = "Ǧ" -hacek 'g' = "ǧ" -hacek 'H' = "Ȟ" -hacek 'h' = "ȟ" -hacek 'I' = "Ǐ" -hacek 'i' = "ǐ" -hacek 'j' = "ǰ" -hacek 'K' = "Ǩ" -hacek 'k' = "ǩ" -hacek 'L' = "Ľ" -hacek 'l' = "ľ" -hacek 'N' = "Ň" -hacek 'n' = "ň" -hacek 'O' = "Ǒ" -hacek 'o' = "ǒ" -hacek 'R' = "Ř" -hacek 'r' = "ř" -hacek 'S' = "Š" -hacek 's' = "š" -hacek 'T' = "Ť" -hacek 't' = "ť" -hacek 'U' = "Ǔ" -hacek 'u' = "ǔ" -hacek 'Z' = "Ž" -hacek 'z' = "ž" -hacek c = [c] - -ogonek :: Char -> String -ogonek 'a' = "ą" -ogonek 'e' = "ę" -ogonek 'o' = "ǫ" -ogonek 'i' = "į" -ogonek 'u' = "ų" -ogonek 'A' = "Ą" -ogonek 'E' = "Ę" -ogonek 'I' = "Į" -ogonek 'O' = "Ǫ" -ogonek 'U' = "Ų" -ogonek c = [c] - -breve :: Char -> String -breve 'A' = "Ă" -breve 'a' = "ă" -breve 'E' = "Ĕ" -breve 'e' = "ĕ" -breve 'G' = "Ğ" -breve 'g' = "ğ" -breve 'I' = "Ĭ" -breve 'i' = "ĭ" -breve 'O' = "Ŏ" -breve 'o' = "ŏ" -breve 'U' = "Ŭ" -breve 'u' = "ŭ" -breve c = [c] - -toksToString :: [Tok] -> String -toksToString = T.unpack . untokenize - mathDisplay :: String -> Inlines mathDisplay = displayMath . trim @@ -1119,7 +680,21 @@ citationLabel = do cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation] cites mode multi = try $ do cits <- if multi - then many1 simpleCiteArgs + then do + multiprenote <- optionMaybe $ toList <$> paropt + multipostnote <- optionMaybe $ toList <$> paropt + let (pre, suf) = case (multiprenote, multipostnote) of + (Just s , Nothing) -> (mempty, s) + (Nothing , Just t) -> (mempty, t) + (Just s , Just t ) -> (s, t) + _ -> (mempty, mempty) + tempCits <- many1 simpleCiteArgs + case tempCits of + (k:ks) -> case ks of + (_:_) -> return $ ((addMprenote pre k):init ks) ++ + [addMpostnote suf (last ks)] + _ -> return [addMprenote pre (addMpostnote suf k)] + _ -> return [[]] else count 1 simpleCiteArgs let cs = concat cits return $ case mode of @@ -1127,6 +702,17 @@ cites mode multi = try $ do (c:rest) -> c {citationMode = mode} : rest [] -> [] _ -> map (\a -> a {citationMode = mode}) cs + where mprenote (k:ks) = (k:ks) ++ [Space] + mprenote _ = mempty + mpostnote (k:ks) = [Str ",", Space] ++ (k:ks) + mpostnote _ = mempty + addMprenote mpn (k:ks) = + let mpnfinal = case citationPrefix k of + (_:_) -> mprenote mpn + _ -> mpn + in addPrefix mpnfinal (k:ks) + addMprenote _ _ = [] + addMpostnote = addSuffix . mpostnote citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines citation name mode multi = do @@ -1181,22 +767,12 @@ tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar' Tok _ _ t <- singleChar return (str (T.unpack t)) -singleChar :: PandocMonad m => LP m Tok -singleChar = try $ do - Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol]) - guard $ not $ toktype == Symbol && - T.any (`Set.member` specialChars) t - if T.length t > 1 - then do - let (t1, t2) = (T.take 1 t, T.drop 1 t) - inp <- getInput - setInput $ Tok (incSourceColumn pos 1) toktype t2 : inp - return $ Tok pos toktype t1 - else return $ Tok pos toktype t - opt :: PandocMonad m => LP m Inlines opt = bracketed inline <|> (str . T.unpack <$> rawopt) +paropt :: PandocMonad m => LP m Inlines +paropt = parenWrapped inline + rawopt :: PandocMonad m => LP m Text rawopt = do inner <- untokenize <$> bracketedToks @@ -1204,30 +780,28 @@ rawopt = do return $ "[" <> inner <> "]" skipopts :: PandocMonad m => LP m () -skipopts = skipMany rawopt +skipopts = skipMany (overlaySpecification <|> void rawopt) -- opts in angle brackets are used in beamer -rawangle :: PandocMonad m => LP m () -rawangle = try $ do +overlaySpecification :: PandocMonad m => LP m () +overlaySpecification = try $ do symbol '<' - () <$ manyTill anyTok (symbol '>') - -skipangles :: PandocMonad m => LP m () -skipangles = skipMany rawangle - -ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a -ignore raw = do - pos <- getPosition - report $ SkippedContent raw pos - return mempty - -withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok]) -withRaw parser = do - inp <- getInput - result <- parser - nxt <- option (Tok (initialPos "source") Word "") (lookAhead anyTok) - let raw = takeWhile (/= nxt) inp - return (result, raw) + ts <- manyTill overlayTok (symbol '>') + guard $ case ts of + -- see issue #3368 + [Tok _ Word s] | T.all isLetter s -> s `elem` + ["beamer","presentation", "trans", + "handout","article", "second"] + _ -> True + +overlayTok :: PandocMonad m => LP m Tok +overlayTok = + satisfyTok (\t -> + case t of + Tok _ Word _ -> True + Tok _ Spaces _ -> True + Tok _ Symbol c -> c `elem` ["-","+","@","|",":",","] + _ -> False) inBrackets :: Inlines -> Inlines inBrackets x = str "[" <> x <> str "]" @@ -1275,6 +849,12 @@ inlineEnvironments = M.fromList [ , ("align*", mathEnvWith id (Just "aligned") "align*") , ("alignat", mathEnvWith id (Just "aligned") "alignat") , ("alignat*", mathEnvWith id (Just "aligned") "alignat*") + , ("dmath", mathEnvWith id Nothing "dmath") + , ("dmath*", mathEnvWith id Nothing "dmath*") + , ("dgroup", mathEnvWith id (Just "aligned") "dgroup") + , ("dgroup*", mathEnvWith id (Just "aligned") "dgroup*") + , ("darray", mathEnvWith id (Just "aligned") "darray") + , ("darray*", mathEnvWith id (Just "aligned") "darray*") ] inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines) @@ -1289,7 +869,7 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok) , ("texttt", ttfamily) , ("sout", extractSpaces strikeout <$> tok) - , ("alert", skipangles >> spanWith ("",["alert"],[]) <$> tok) -- beamer + , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer , ("lq", return (str "‘")) , ("rq", return (str "’")) , ("textquoteleft", return (str "‘")) @@ -1318,7 +898,7 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("(", mathInline . toksToString <$> manyTill anyTok (controlSeq ")")) , ("[", mathDisplay . toksToString <$> manyTill anyTok (controlSeq "]")) , ("ensuremath", mathInline . toksToString <$> braced) - , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok) + , ("texorpdfstring", (\x _ -> x) <$> tok <*> tok) , ("P", lit "¶") , ("S", lit "§") , ("$", lit "$") @@ -1361,20 +941,32 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("copyright", lit "©") , ("textasciicircum", lit "^") , ("textasciitilde", lit "~") - , ("H", accent '\779' hungarumlaut) - , ("`", accent '`' grave) - , ("'", accent '\'' acute) - , ("^", accent '^' circ) - , ("~", accent '~' tilde) - , ("\"", accent '\776' umlaut) - , (".", accent '\775' dot) - , ("=", accent '\772' macron) - , ("c", accent '\807' cedilla) - , ("v", accent 'ˇ' hacek) - , ("u", accent '\774' breve) - , ("k", accent '\808' ogonek) - , ("textogonekcentered", accent '\808' ogonek) - , ("i", lit "i") + , ("H", accent '\779' Nothing) -- hungarumlaut + , ("`", accent '\768' (Just '`')) -- grave + , ("'", accent '\769' (Just '\'')) -- acute + , ("^", accent '\770' (Just '^')) -- circ + , ("~", accent '\771' (Just '~')) -- tilde + , ("\"", accent '\776' Nothing) -- umlaut + , (".", accent '\775' Nothing) -- dot + , ("=", accent '\772' Nothing) -- macron + , ("|", accent '\781' Nothing) -- vertical line above + , ("b", accent '\817' Nothing) -- macron below + , ("c", accent '\807' Nothing) -- cedilla + , ("G", accent '\783' Nothing) -- doublegrave + , ("h", accent '\777' Nothing) -- hookabove + , ("d", accent '\803' Nothing) -- dotbelow + , ("f", accent '\785' Nothing) -- inverted breve + , ("r", accent '\778' Nothing) -- ringabove + , ("t", accent '\865' Nothing) -- double inverted breve + , ("U", accent '\782' Nothing) -- double vertical line above + , ("v", accent '\780' Nothing) -- hacek + , ("u", accent '\774' Nothing) -- breve + , ("k", accent '\808' Nothing) -- ogonek + , ("textogonekcentered", accent '\808' Nothing) -- ogonek + , ("i", lit "ı") -- dotless i + , ("j", lit "ȷ") -- dotless j + , ("newtie", accent '\785' Nothing) -- inverted breve + , ("textcircled", accent '\8413' Nothing) -- combining circle , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState guard $ not inTableCell optional opt @@ -1392,17 +984,25 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("footnote", skipopts >> note <$> grouped block) , ("verb", doverb) , ("lstinline", dolstinline) + , ("mintinline", domintinline) , ("Verb", doverb) - , ("url", ((unescapeURL . T.unpack . untokenize) <$> braced) >>= \url -> + , ("url", ((unescapeURL . T.unpack . untokenize) <$> bracedUrl) >>= \url -> pure (link url "" (str url))) , ("href", (unescapeURL . toksToString <$> - braced <* optional sp) >>= \url -> + bracedUrl <* optional sp) >>= \url -> tok >>= \lab -> pure (link url "" lab)) , ("includegraphics", do options <- option [] keyvals src <- unescapeURL . T.unpack . removeDoubleQuotes . untokenize <$> braced mkImage options src) - , ("enquote", enquote) + , ("enquote*", enquote True Nothing) + , ("enquote", enquote False Nothing) + -- foreignquote is supposed to use native quote marks + , ("foreignquote*", braced >>= enquote True . Just . untokenize) + , ("foreignquote", braced >>= enquote False . Just . untokenize) + -- hypehnquote uses regular quotes + , ("hyphenquote*", braced >>= enquote True . Just . untokenize) + , ("hyphenquote", braced >>= enquote False . Just . untokenize) , ("figurename", doTerm Translations.Figure) , ("prefacename", doTerm Translations.Preface) , ("refname", doTerm Translations.References) @@ -1507,13 +1107,6 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("acsp", doAcronymPlural "abbrv") -- siuntix , ("SI", dosiunitx) - -- units of siuntix - , ("celsius", lit "°C") - , ("degreeCelsius", lit "°C") - , ("gram", lit "g") - , ("meter", lit "m") - , ("milli", lit "m") - , ("square", dosquare) -- hyphenat , ("bshyp", lit "\\\173") , ("fshyp", lit "/\173") @@ -1542,8 +1135,18 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("Rn", romanNumeralLower) -- babel , ("foreignlanguage", foreignlanguage) + -- include + , ("input", include "input") + -- plain tex stuff that should just be passed through as raw tex + , ("ifdim", ifdim) ] +ifdim :: PandocMonad m => LP m Inlines +ifdim = do + contents <- manyTill anyTok (controlSeq "fi") + return $ rawInline "latex" $ T.unpack $ + "\\ifdim" <> untokenize contents <> "\\fi" + makeUppercase :: Inlines -> Inlines makeUppercase = fromList . walk (alterStr (map toUpper)) . toList @@ -1693,7 +1296,6 @@ getRawCommand name txt = do "def" -> void $ manyTill anyTok braced _ -> do - skipangles skipopts option "" (try (optional sp *> dimenarg)) void $ many braced @@ -1818,7 +1420,6 @@ end_ t = try (do preamble :: PandocMonad m => LP m Blocks preamble = mempty <$ many preambleBlock where preambleBlock = spaces1 - <|> void include <|> void macroDef <|> void blockCommand <|> void braced @@ -1831,11 +1432,8 @@ paragraph = do then return mempty else return $ para x -include :: PandocMonad m => LP m Blocks -include = do - (Tok _ (CtrlSeq name) _) <- - controlSeq "include" <|> controlSeq "input" <|> - controlSeq "subfile" <|> controlSeq "usepackage" +include :: (PandocMonad m, Monoid a) => Text -> LP m a +include name = do skipMany opt fs <- (map (T.unpack . removeDoubleQuotes . T.strip) . T.splitOn "," . untokenize) <$> braced @@ -1912,31 +1510,28 @@ letmacro = do optional $ symbol '=' spaces contents <- bracedOrToken - return (name, Macro ExpandWhenDefined 0 Nothing contents) + return (name, Macro ExpandWhenDefined [] Nothing contents) defmacro :: PandocMonad m => LP m (Text, Macro) defmacro = try $ do controlSeq "def" Tok _ (CtrlSeq name) _ <- anyControlSeq - numargs <- option 0 $ argSeq 1 + argspecs <- many (argspecArg <|> argspecPattern) -- we use withVerbatimMode, because macros are to be expanded -- at point of use, not point of definition contents <- withVerbatimMode bracedOrToken - return (name, Macro ExpandWhenUsed numargs Nothing contents) + return (name, Macro ExpandWhenUsed argspecs Nothing contents) --- Note: we don't yet support fancy things like #1.#2 -argSeq :: PandocMonad m => Int -> LP m Int -argSeq n = do +argspecArg :: PandocMonad m => LP m ArgSpec +argspecArg = do Tok _ (Arg i) _ <- satisfyTok isArgTok - guard $ i == n - argSeq (n+1) <|> return n + return $ ArgNum i -isArgTok :: Tok -> Bool -isArgTok (Tok _ (Arg _) _) = True -isArgTok _ = False - -bracedOrToken :: PandocMonad m => LP m [Tok] -bracedOrToken = braced <|> ((:[]) <$> (anyControlSeq <|> singleChar)) +argspecPattern :: PandocMonad m => LP m ArgSpec +argspecPattern = + Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) -> + (toktype' == Symbol || toktype' == Word) && + (txt /= "{" && txt /= "\\" && txt /= "}"))) newcommand :: PandocMonad m => LP m (Text, Macro) newcommand = do @@ -1950,6 +1545,7 @@ newcommand = do (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}') spaces numargs <- option 0 $ try bracketedNum + let argspecs = map (\i -> ArgNum i) [1..numargs] spaces optarg <- option Nothing $ Just <$> try bracketedToks spaces @@ -1959,7 +1555,7 @@ newcommand = do case M.lookup name macros of Just _ -> report $ MacroAlreadyDefined (T.unpack txt) pos Nothing -> return () - return (name, Macro ExpandWhenUsed numargs optarg contents) + return (name, Macro ExpandWhenUsed argspecs optarg contents) newenvironment :: PandocMonad m => LP m (Text, Macro, Macro) newenvironment = do @@ -1972,6 +1568,7 @@ newenvironment = do name <- untokenize <$> braced spaces numargs <- option 0 $ try bracketedNum + let argspecs = map (\i -> ArgNum i) [1..numargs] spaces optarg <- option Nothing $ Just <$> try bracketedToks spaces @@ -1983,13 +1580,8 @@ newenvironment = do case M.lookup name macros of Just _ -> report $ MacroAlreadyDefined (T.unpack name) pos Nothing -> return () - return (name, Macro ExpandWhenUsed numargs optarg startcontents, - Macro ExpandWhenUsed 0 Nothing endcontents) - -bracketedToks :: PandocMonad m => LP m [Tok] -bracketedToks = do - symbol '[' - mconcat <$> manyTill (braced <|> (:[]) <$> anyTok) (symbol ']') + return (name, Macro ExpandWhenUsed argspecs optarg startcontents, + Macro ExpandWhenUsed [] Nothing endcontents) bracketedNum :: PandocMonad m => LP m Int bracketedNum = do @@ -2003,11 +1595,13 @@ setCaption = do ils <- tok mblabel <- option Nothing $ try $ spaces >> controlSeq "label" >> (Just <$> tok) - let ils' = case mblabel of - Just lab -> ils <> spanWith - ("",[],[("label", stringify lab)]) mempty - Nothing -> ils - updateState $ \st -> st{ sCaption = Just ils' } + let capt = case mblabel of + Just lab -> let slab = stringify lab + ils' = ils <> spanWith + ("",[],[("label", slab)]) mempty + in (Just ils', Just slab) + Nothing -> (Just ils, Nothing) + updateState $ \st -> st{ sCaption = capt } return mempty looseItem :: PandocMonad m => LP m Blocks @@ -2018,28 +1612,27 @@ looseItem = do return mempty resetCaption :: PandocMonad m => LP m () -resetCaption = updateState $ \st -> st{ sCaption = Nothing } +resetCaption = updateState $ \st -> st{ sCaption = (Nothing, Nothing) } -section :: PandocMonad m => Bool -> Attr -> Int -> LP m Blocks -section starred (ident, classes, kvs) lvl = do +section :: PandocMonad m => Attr -> Int -> LP m Blocks +section (ident, classes, kvs) lvl = do skipopts contents <- grouped inline lab <- option ident $ try (spaces >> controlSeq "label" >> spaces >> toksToString <$> braced) - let classes' = if starred then "unnumbered" : classes else classes when (lvl == 0) $ updateState $ \st -> st{ sHasChapters = True } - unless starred $ do + unless ("unnumbered" `elem` classes) $ do hn <- sLastHeaderNum <$> getState hasChapters <- sHasChapters <$> getState let lvl' = lvl + if hasChapters then 1 else 0 - let num = incrementHeaderNum lvl' hn - updateState $ \st -> st{ sLastHeaderNum = num } - updateState $ \st -> st{ sLabels = M.insert lab - [Str (renderHeaderNum num)] - (sLabels st) } - attr' <- registerHeader (lab, classes', kvs) contents + let num = incrementDottedNum lvl' hn + updateState $ \st -> st{ sLastHeaderNum = num + , sLabels = M.insert lab + [Str (renderDottedNum num)] + (sLabels st) } + attr' <- registerHeader (lab, classes, kvs) contents return $ headerWith attr' lvl contents blockCommand :: PandocMonad m => LP m Blocks @@ -2100,23 +1693,23 @@ blockCommands = M.fromList -- Koma-script metadata commands , ("dedication", mempty <$ (skipopts *> tok >>= addMeta "dedication")) -- sectioning - , ("part", section False nullAttr (-1)) - , ("part*", section True nullAttr (-1)) - , ("chapter", section False nullAttr 0) - , ("chapter*", section True ("",["unnumbered"],[]) 0) - , ("section", section False nullAttr 1) - , ("section*", section True ("",["unnumbered"],[]) 1) - , ("subsection", section False nullAttr 2) - , ("subsection*", section True ("",["unnumbered"],[]) 2) - , ("subsubsection", section False nullAttr 3) - , ("subsubsection*", section True ("",["unnumbered"],[]) 3) - , ("paragraph", section False nullAttr 4) - , ("paragraph*", section True ("",["unnumbered"],[]) 4) - , ("subparagraph", section False nullAttr 5) - , ("subparagraph*", section True ("",["unnumbered"],[]) 5) + , ("part", section nullAttr (-1)) + , ("part*", section nullAttr (-1)) + , ("chapter", section nullAttr 0) + , ("chapter*", section ("",["unnumbered"],[]) 0) + , ("section", section nullAttr 1) + , ("section*", section ("",["unnumbered"],[]) 1) + , ("subsection", section nullAttr 2) + , ("subsection*", section ("",["unnumbered"],[]) 2) + , ("subsubsection", section nullAttr 3) + , ("subsubsection*", section ("",["unnumbered"],[]) 3) + , ("paragraph", section nullAttr 4) + , ("paragraph*", section ("",["unnumbered"],[]) 4) + , ("subparagraph", section nullAttr 5) + , ("subparagraph*", section ("",["unnumbered"],[]) 5) -- beamer slides - , ("frametitle", section False nullAttr 3) - , ("framesubtitle", section False nullAttr 4) + , ("frametitle", section nullAttr 3) + , ("framesubtitle", section nullAttr 4) -- letters , ("opening", (para . trimInlines) <$> (skipopts *> tok)) , ("closing", skipopts *> closing) @@ -2152,6 +1745,18 @@ blockCommands = M.fromList -- LaTeX colors , ("textcolor", coloredBlock "color") , ("colorbox", coloredBlock "background-color") + -- csquotes + , ("blockquote", blockquote False Nothing) + , ("blockcquote", blockquote True Nothing) + , ("foreignblockquote", braced >>= blockquote False . Just . untokenize) + , ("foreignblockcquote", braced >>= blockquote True . Just . untokenize) + , ("hyphenblockquote", braced >>= blockquote False . Just . untokenize) + , ("hyphenblockcquote", braced >>= blockquote True . Just . untokenize) + -- include + , ("include", include "include") + , ("input", include "input") + , ("subfile", include "subfile") + , ("usepackage", include "usepackage") ] @@ -2192,6 +1797,7 @@ environments = M.fromList , ("minted", minted) , ("obeylines", obeylines) , ("tikzpicture", rawVerbEnv "tikzpicture") + , ("lilypond", rawVerbEnv "lilypond") -- etoolbox , ("ifstrequal", ifstrequal) , ("newtoggle", braced >>= newToggle) @@ -2234,7 +1840,7 @@ rawVerbEnv :: PandocMonad m => Text -> LP m Blocks rawVerbEnv name = do pos <- getPosition (_, raw) <- withRaw $ verbEnv name - let raw' = "\\begin{tikzpicture}" ++ toksToString raw + let raw' = "\\begin{" ++ T.unpack name ++ "}" ++ toksToString raw exts <- getOption readerExtensions let parseRaw = extensionEnabled Ext_raw_tex exts if parseRaw @@ -2248,7 +1854,20 @@ verbEnv name = withVerbatimMode $ do skipopts optional blankline res <- manyTill anyTok (end_ name) - return $ stripTrailingNewlines $ toksToString res + return $ T.unpack + $ stripTrailingNewline + $ untokenize + $ res + +-- Strip single final newline and any spaces following it. +-- Input is unchanged if it doesn't end with newline + +-- optional spaces. +stripTrailingNewline :: Text -> Text +stripTrailingNewline t = + let (b, e) = T.breakOnEnd "\n" t + in if T.all (== ' ') e + then T.dropEnd 1 b + else t fancyverbEnv :: PandocMonad m => Text -> LP m Blocks fancyverbEnv name = do @@ -2303,12 +1922,43 @@ figure = try $ do addImageCaption :: PandocMonad m => Blocks -> LP m Blocks addImageCaption = walkM go - where go (Image attr alt (src,tit)) + where go (Image attr@(_, cls, kvs) alt (src,tit)) | not ("fig:" `isPrefixOf` tit) = do - mbcapt <- sCaption <$> getState - return $ case mbcapt of - Just ils -> Image attr (toList ils) (src, "fig:" ++ tit) - Nothing -> Image attr alt (src,tit) + (mbcapt, mblab) <- sCaption <$> getState + let (alt', tit') = case mbcapt of + Just ils -> (toList ils, "fig:" ++ tit) + Nothing -> (alt, tit) + attr' = case mblab of + Just lab -> (lab, cls, kvs) + Nothing -> attr + case attr' of + ("", _, _) -> return () + (ident, _, _) -> do + st <- getState + let chapnum = + case (sHasChapters st, sLastHeaderNum st) of + (True, DottedNum (n:_)) -> Just n + _ -> Nothing + let num = case sLastFigureNum st of + DottedNum [m,n] -> + case chapnum of + Just m' | m' == m -> DottedNum [m, n+1] + | otherwise -> DottedNum [m', 1] + Nothing -> DottedNum [1] + -- shouldn't happen + DottedNum [n] -> + case chapnum of + Just m -> DottedNum [m, 1] + Nothing -> DottedNum [n + 1] + _ -> + case chapnum of + Just n -> DottedNum [n, 1] + Nothing -> DottedNum [1] + setState $ + st{ sLastFigureNum = num + , sLabels = M.insert ident + [Str (renderDottedNum num)] (sLabels st) } + return $ Image attr' alt' (src, tit') go x = return x coloredBlock :: PandocMonad m => String -> LP m Blocks @@ -2321,7 +1971,8 @@ coloredBlock stylename = try $ do graphicsPath :: PandocMonad m => LP m Blocks graphicsPath = do - ps <- map toksToString <$> (bgroup *> manyTill braced egroup) + ps <- map toksToString <$> + (bgroup *> spaces *> manyTill (braced <* spaces) egroup) getResourcePath >>= setResourcePath . (++ ps) return mempty @@ -2579,7 +2230,7 @@ simpTable envname hasWidthParameter = try $ do addTableCaption :: PandocMonad m => Blocks -> LP m Blocks addTableCaption = walkM go where go (Table c als ws hs rs) = do - mbcapt <- sCaption <$> getState + (mbcapt, _) <- sCaption <$> getState return $ case mbcapt of Just ils -> Table (toList ils) als ws hs rs Nothing -> Table c als ws hs rs @@ -2590,7 +2241,6 @@ block :: PandocMonad m => LP m Blocks block = do res <- (mempty <$ spaces1) <|> environment - <|> include <|> macroDef <|> blockCommand <|> paragraph @@ -2613,137 +2263,3 @@ setDefaultLanguage = do setTranslations l updateState $ setMeta "lang" $ str (renderLang l) return mempty - -polyglossiaLangToBCP47 :: M.Map String (String -> Lang) -polyglossiaLangToBCP47 = M.fromList - [ ("arabic", \o -> case filter (/=' ') o of - "locale=algeria" -> Lang "ar" "" "DZ" [] - "locale=mashriq" -> Lang "ar" "" "SY" [] - "locale=libya" -> Lang "ar" "" "LY" [] - "locale=morocco" -> Lang "ar" "" "MA" [] - "locale=mauritania" -> Lang "ar" "" "MR" [] - "locale=tunisia" -> Lang "ar" "" "TN" [] - _ -> Lang "ar" "" "" []) - , ("german", \o -> case filter (/=' ') o of - "spelling=old" -> Lang "de" "" "DE" ["1901"] - "variant=austrian,spelling=old" - -> Lang "de" "" "AT" ["1901"] - "variant=austrian" -> Lang "de" "" "AT" [] - "variant=swiss,spelling=old" - -> Lang "de" "" "CH" ["1901"] - "variant=swiss" -> Lang "de" "" "CH" [] - _ -> Lang "de" "" "" []) - , ("lsorbian", \_ -> Lang "dsb" "" "" []) - , ("greek", \o -> case filter (/=' ') o of - "variant=poly" -> Lang "el" "" "polyton" [] - "variant=ancient" -> Lang "grc" "" "" [] - _ -> Lang "el" "" "" []) - , ("english", \o -> case filter (/=' ') o of - "variant=australian" -> Lang "en" "" "AU" [] - "variant=canadian" -> Lang "en" "" "CA" [] - "variant=british" -> Lang "en" "" "GB" [] - "variant=newzealand" -> Lang "en" "" "NZ" [] - "variant=american" -> Lang "en" "" "US" [] - _ -> Lang "en" "" "" []) - , ("usorbian", \_ -> Lang "hsb" "" "" []) - , ("latin", \o -> case filter (/=' ') o of - "variant=classic" -> Lang "la" "" "" ["x-classic"] - _ -> Lang "la" "" "" []) - , ("slovenian", \_ -> Lang "sl" "" "" []) - , ("serbianc", \_ -> Lang "sr" "cyrl" "" []) - , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"]) - , ("afrikaans", \_ -> Lang "af" "" "" []) - , ("amharic", \_ -> Lang "am" "" "" []) - , ("assamese", \_ -> Lang "as" "" "" []) - , ("asturian", \_ -> Lang "ast" "" "" []) - , ("bulgarian", \_ -> Lang "bg" "" "" []) - , ("bengali", \_ -> Lang "bn" "" "" []) - , ("tibetan", \_ -> Lang "bo" "" "" []) - , ("breton", \_ -> Lang "br" "" "" []) - , ("catalan", \_ -> Lang "ca" "" "" []) - , ("welsh", \_ -> Lang "cy" "" "" []) - , ("czech", \_ -> Lang "cs" "" "" []) - , ("coptic", \_ -> Lang "cop" "" "" []) - , ("danish", \_ -> Lang "da" "" "" []) - , ("divehi", \_ -> Lang "dv" "" "" []) - , ("esperanto", \_ -> Lang "eo" "" "" []) - , ("spanish", \_ -> Lang "es" "" "" []) - , ("estonian", \_ -> Lang "et" "" "" []) - , ("basque", \_ -> Lang "eu" "" "" []) - , ("farsi", \_ -> Lang "fa" "" "" []) - , ("finnish", \_ -> Lang "fi" "" "" []) - , ("french", \_ -> Lang "fr" "" "" []) - , ("friulan", \_ -> Lang "fur" "" "" []) - , ("irish", \_ -> Lang "ga" "" "" []) - , ("scottish", \_ -> Lang "gd" "" "" []) - , ("ethiopic", \_ -> Lang "gez" "" "" []) - , ("galician", \_ -> Lang "gl" "" "" []) - , ("hebrew", \_ -> Lang "he" "" "" []) - , ("hindi", \_ -> Lang "hi" "" "" []) - , ("croatian", \_ -> Lang "hr" "" "" []) - , ("magyar", \_ -> Lang "hu" "" "" []) - , ("armenian", \_ -> Lang "hy" "" "" []) - , ("interlingua", \_ -> Lang "ia" "" "" []) - , ("indonesian", \_ -> Lang "id" "" "" []) - , ("icelandic", \_ -> Lang "is" "" "" []) - , ("italian", \_ -> Lang "it" "" "" []) - , ("japanese", \_ -> Lang "jp" "" "" []) - , ("khmer", \_ -> Lang "km" "" "" []) - , ("kurmanji", \_ -> Lang "kmr" "" "" []) - , ("kannada", \_ -> Lang "kn" "" "" []) - , ("korean", \_ -> Lang "ko" "" "" []) - , ("lao", \_ -> Lang "lo" "" "" []) - , ("lithuanian", \_ -> Lang "lt" "" "" []) - , ("latvian", \_ -> Lang "lv" "" "" []) - , ("malayalam", \_ -> Lang "ml" "" "" []) - , ("mongolian", \_ -> Lang "mn" "" "" []) - , ("marathi", \_ -> Lang "mr" "" "" []) - , ("dutch", \_ -> Lang "nl" "" "" []) - , ("nynorsk", \_ -> Lang "nn" "" "" []) - , ("norsk", \_ -> Lang "no" "" "" []) - , ("nko", \_ -> Lang "nqo" "" "" []) - , ("occitan", \_ -> Lang "oc" "" "" []) - , ("panjabi", \_ -> Lang "pa" "" "" []) - , ("polish", \_ -> Lang "pl" "" "" []) - , ("piedmontese", \_ -> Lang "pms" "" "" []) - , ("portuguese", \_ -> Lang "pt" "" "" []) - , ("romansh", \_ -> Lang "rm" "" "" []) - , ("romanian", \_ -> Lang "ro" "" "" []) - , ("russian", \_ -> Lang "ru" "" "" []) - , ("sanskrit", \_ -> Lang "sa" "" "" []) - , ("samin", \_ -> Lang "se" "" "" []) - , ("slovak", \_ -> Lang "sk" "" "" []) - , ("albanian", \_ -> Lang "sq" "" "" []) - , ("serbian", \_ -> Lang "sr" "" "" []) - , ("swedish", \_ -> Lang "sv" "" "" []) - , ("syriac", \_ -> Lang "syr" "" "" []) - , ("tamil", \_ -> Lang "ta" "" "" []) - , ("telugu", \_ -> Lang "te" "" "" []) - , ("thai", \_ -> Lang "th" "" "" []) - , ("turkmen", \_ -> Lang "tk" "" "" []) - , ("turkish", \_ -> Lang "tr" "" "" []) - , ("ukrainian", \_ -> Lang "uk" "" "" []) - , ("urdu", \_ -> Lang "ur" "" "" []) - , ("vietnamese", \_ -> Lang "vi" "" "" []) - ] - -babelLangToBCP47 :: String -> Maybe Lang -babelLangToBCP47 s = - case s of - "austrian" -> Just $ Lang "de" "" "AT" ["1901"] - "naustrian" -> Just $ Lang "de" "" "AT" [] - "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"] - "nswissgerman" -> Just $ Lang "de" "" "CH" [] - "german" -> Just $ Lang "de" "" "DE" ["1901"] - "ngerman" -> Just $ Lang "de" "" "DE" [] - "lowersorbian" -> Just $ Lang "dsb" "" "" [] - "uppersorbian" -> Just $ Lang "hsb" "" "" [] - "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"] - "slovene" -> Just $ Lang "sl" "" "" [] - "australian" -> Just $ Lang "en" "" "AU" [] - "canadian" -> Just $ Lang "en" "" "CA" [] - "british" -> Just $ Lang "en" "" "GB" [] - "newzealand" -> Just $ Lang "en" "" "NZ" [] - "american" -> Just $ Lang "en" "" "US" [] - "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"] - _ -> fmap ($ "") $ M.lookup s polyglossiaLangToBCP47 diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs new file mode 100644 index 000000000..9b57c98fd --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs @@ -0,0 +1,173 @@ +{-# LANGUAGE NoImplicitPrelude #-} +{- +Copyright (C) 2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.LaTeX.Lang + Copyright : Copyright (C) 2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Functions for parsing polyglossia and babel language specifiers to +BCP47 'Lang'. +-} +module Text.Pandoc.Readers.LaTeX.Lang + ( polyglossiaLangToBCP47 + , babelLangToBCP47 + ) +where +import Prelude +import qualified Data.Map as M +import Text.Pandoc.BCP47 (Lang(..)) + +polyglossiaLangToBCP47 :: M.Map String (String -> Lang) +polyglossiaLangToBCP47 = M.fromList + [ ("arabic", \o -> case filter (/=' ') o of + "locale=algeria" -> Lang "ar" "" "DZ" [] + "locale=mashriq" -> Lang "ar" "" "SY" [] + "locale=libya" -> Lang "ar" "" "LY" [] + "locale=morocco" -> Lang "ar" "" "MA" [] + "locale=mauritania" -> Lang "ar" "" "MR" [] + "locale=tunisia" -> Lang "ar" "" "TN" [] + _ -> Lang "ar" "" "" []) + , ("german", \o -> case filter (/=' ') o of + "spelling=old" -> Lang "de" "" "DE" ["1901"] + "variant=austrian,spelling=old" + -> Lang "de" "" "AT" ["1901"] + "variant=austrian" -> Lang "de" "" "AT" [] + "variant=swiss,spelling=old" + -> Lang "de" "" "CH" ["1901"] + "variant=swiss" -> Lang "de" "" "CH" [] + _ -> Lang "de" "" "" []) + , ("lsorbian", \_ -> Lang "dsb" "" "" []) + , ("greek", \o -> case filter (/=' ') o of + "variant=poly" -> Lang "el" "" "polyton" [] + "variant=ancient" -> Lang "grc" "" "" [] + _ -> Lang "el" "" "" []) + , ("english", \o -> case filter (/=' ') o of + "variant=australian" -> Lang "en" "" "AU" [] + "variant=canadian" -> Lang "en" "" "CA" [] + "variant=british" -> Lang "en" "" "GB" [] + "variant=newzealand" -> Lang "en" "" "NZ" [] + "variant=american" -> Lang "en" "" "US" [] + _ -> Lang "en" "" "" []) + , ("usorbian", \_ -> Lang "hsb" "" "" []) + , ("latin", \o -> case filter (/=' ') o of + "variant=classic" -> Lang "la" "" "" ["x-classic"] + _ -> Lang "la" "" "" []) + , ("slovenian", \_ -> Lang "sl" "" "" []) + , ("serbianc", \_ -> Lang "sr" "cyrl" "" []) + , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"]) + , ("afrikaans", \_ -> Lang "af" "" "" []) + , ("amharic", \_ -> Lang "am" "" "" []) + , ("assamese", \_ -> Lang "as" "" "" []) + , ("asturian", \_ -> Lang "ast" "" "" []) + , ("bulgarian", \_ -> Lang "bg" "" "" []) + , ("bengali", \_ -> Lang "bn" "" "" []) + , ("tibetan", \_ -> Lang "bo" "" "" []) + , ("breton", \_ -> Lang "br" "" "" []) + , ("catalan", \_ -> Lang "ca" "" "" []) + , ("welsh", \_ -> Lang "cy" "" "" []) + , ("czech", \_ -> Lang "cs" "" "" []) + , ("coptic", \_ -> Lang "cop" "" "" []) + , ("danish", \_ -> Lang "da" "" "" []) + , ("divehi", \_ -> Lang "dv" "" "" []) + , ("esperanto", \_ -> Lang "eo" "" "" []) + , ("spanish", \_ -> Lang "es" "" "" []) + , ("estonian", \_ -> Lang "et" "" "" []) + , ("basque", \_ -> Lang "eu" "" "" []) + , ("farsi", \_ -> Lang "fa" "" "" []) + , ("finnish", \_ -> Lang "fi" "" "" []) + , ("french", \_ -> Lang "fr" "" "" []) + , ("friulan", \_ -> Lang "fur" "" "" []) + , ("irish", \_ -> Lang "ga" "" "" []) + , ("scottish", \_ -> Lang "gd" "" "" []) + , ("ethiopic", \_ -> Lang "gez" "" "" []) + , ("galician", \_ -> Lang "gl" "" "" []) + , ("hebrew", \_ -> Lang "he" "" "" []) + , ("hindi", \_ -> Lang "hi" "" "" []) + , ("croatian", \_ -> Lang "hr" "" "" []) + , ("magyar", \_ -> Lang "hu" "" "" []) + , ("armenian", \_ -> Lang "hy" "" "" []) + , ("interlingua", \_ -> Lang "ia" "" "" []) + , ("indonesian", \_ -> Lang "id" "" "" []) + , ("icelandic", \_ -> Lang "is" "" "" []) + , ("italian", \_ -> Lang "it" "" "" []) + , ("japanese", \_ -> Lang "jp" "" "" []) + , ("khmer", \_ -> Lang "km" "" "" []) + , ("kurmanji", \_ -> Lang "kmr" "" "" []) + , ("kannada", \_ -> Lang "kn" "" "" []) + , ("korean", \_ -> Lang "ko" "" "" []) + , ("lao", \_ -> Lang "lo" "" "" []) + , ("lithuanian", \_ -> Lang "lt" "" "" []) + , ("latvian", \_ -> Lang "lv" "" "" []) + , ("malayalam", \_ -> Lang "ml" "" "" []) + , ("mongolian", \_ -> Lang "mn" "" "" []) + , ("marathi", \_ -> Lang "mr" "" "" []) + , ("dutch", \_ -> Lang "nl" "" "" []) + , ("nynorsk", \_ -> Lang "nn" "" "" []) + , ("norsk", \_ -> Lang "no" "" "" []) + , ("nko", \_ -> Lang "nqo" "" "" []) + , ("occitan", \_ -> Lang "oc" "" "" []) + , ("panjabi", \_ -> Lang "pa" "" "" []) + , ("polish", \_ -> Lang "pl" "" "" []) + , ("piedmontese", \_ -> Lang "pms" "" "" []) + , ("portuguese", \_ -> Lang "pt" "" "" []) + , ("romansh", \_ -> Lang "rm" "" "" []) + , ("romanian", \_ -> Lang "ro" "" "" []) + , ("russian", \_ -> Lang "ru" "" "" []) + , ("sanskrit", \_ -> Lang "sa" "" "" []) + , ("samin", \_ -> Lang "se" "" "" []) + , ("slovak", \_ -> Lang "sk" "" "" []) + , ("albanian", \_ -> Lang "sq" "" "" []) + , ("serbian", \_ -> Lang "sr" "" "" []) + , ("swedish", \_ -> Lang "sv" "" "" []) + , ("syriac", \_ -> Lang "syr" "" "" []) + , ("tamil", \_ -> Lang "ta" "" "" []) + , ("telugu", \_ -> Lang "te" "" "" []) + , ("thai", \_ -> Lang "th" "" "" []) + , ("turkmen", \_ -> Lang "tk" "" "" []) + , ("turkish", \_ -> Lang "tr" "" "" []) + , ("ukrainian", \_ -> Lang "uk" "" "" []) + , ("urdu", \_ -> Lang "ur" "" "" []) + , ("vietnamese", \_ -> Lang "vi" "" "" []) + ] + +babelLangToBCP47 :: String -> Maybe Lang +babelLangToBCP47 s = + case s of + "austrian" -> Just $ Lang "de" "" "AT" ["1901"] + "naustrian" -> Just $ Lang "de" "" "AT" [] + "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"] + "nswissgerman" -> Just $ Lang "de" "" "CH" [] + "german" -> Just $ Lang "de" "" "DE" ["1901"] + "ngerman" -> Just $ Lang "de" "" "DE" [] + "lowersorbian" -> Just $ Lang "dsb" "" "" [] + "uppersorbian" -> Just $ Lang "hsb" "" "" [] + "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"] + "slovene" -> Just $ Lang "sl" "" "" [] + "australian" -> Just $ Lang "en" "" "AU" [] + "canadian" -> Just $ Lang "en" "" "CA" [] + "british" -> Just $ Lang "en" "" "GB" [] + "newzealand" -> Just $ Lang "en" "" "NZ" [] + "american" -> Just $ Lang "en" "" "US" [] + "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"] + _ -> fmap ($ "") $ M.lookup s polyglossiaLangToBCP47 diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs new file mode 100644 index 000000000..9256217fe --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -0,0 +1,668 @@ +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{- +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.LaTeX.Parsing + Copyright : Copyright (C) 2006-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +General parsing types and functions for LaTeX. +-} +module Text.Pandoc.Readers.LaTeX.Parsing + ( DottedNum(..) + , renderDottedNum + , incrementDottedNum + , LaTeXState(..) + , defaultLaTeXState + , LP + , withVerbatimMode + , rawLaTeXParser + , applyMacros + , tokenize + , untokenize + , untoken + , totoks + , toksToString + , satisfyTok + , doMacros + , setpos + , anyControlSeq + , anySymbol + , isNewlineTok + , isWordTok + , isArgTok + , spaces + , spaces1 + , tokTypeIn + , controlSeq + , symbol + , symbolIn + , sp + , whitespace + , newlineTok + , comment + , anyTok + , singleChar + , specialChars + , endline + , blankline + , primEscape + , bgroup + , egroup + , grouped + , braced + , braced' + , bracedUrl + , bracedOrToken + , bracketed + , bracketedToks + , parenWrapped + , dimenarg + , ignore + , withRaw + ) where + +import Prelude +import Control.Applicative (many, (<|>)) +import Control.Monad +import Control.Monad.Except (throwError) +import Control.Monad.Trans (lift) +import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord) +import Data.Default +import Data.List (intercalate) +import qualified Data.Map as M +import qualified Data.Set as Set +import Data.Text (Text) +import qualified Data.Text as T +import Text.Pandoc.Builder +import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Error (PandocError (PandocMacroLoop)) +import Text.Pandoc.Logging +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline, + optional, space, spaces, withRaw, (<|>)) +import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..), + ArgSpec (..), Tok (..), TokType (..)) +import Text.Pandoc.Shared +import Text.Parsec.Pos + +newtype DottedNum = DottedNum [Int] + deriving (Show) + +renderDottedNum :: DottedNum -> String +renderDottedNum (DottedNum xs) = + intercalate "." (map show xs) + +incrementDottedNum :: Int -> DottedNum -> DottedNum +incrementDottedNum level (DottedNum ns) = DottedNum $ + case reverse (take level (ns ++ repeat 0)) of + (x:xs) -> reverse (x+1 : xs) + [] -> [] -- shouldn't happen + +data LaTeXState = LaTeXState{ sOptions :: ReaderOptions + , sMeta :: Meta + , sQuoteContext :: QuoteContext + , sMacros :: M.Map Text Macro + , sContainers :: [String] + , sHeaders :: M.Map Inlines String + , sLogMessages :: [LogMessage] + , sIdentifiers :: Set.Set String + , sVerbatimMode :: Bool + , sCaption :: (Maybe Inlines, Maybe String) + , sInListItem :: Bool + , sInTableCell :: Bool + , sLastHeaderNum :: DottedNum + , sLastFigureNum :: DottedNum + , sLabels :: M.Map String [Inline] + , sHasChapters :: Bool + , sToggles :: M.Map String Bool + } + deriving Show + +defaultLaTeXState :: LaTeXState +defaultLaTeXState = LaTeXState{ sOptions = def + , sMeta = nullMeta + , sQuoteContext = NoQuote + , sMacros = M.empty + , sContainers = [] + , sHeaders = M.empty + , sLogMessages = [] + , sIdentifiers = Set.empty + , sVerbatimMode = False + , sCaption = (Nothing, Nothing) + , sInListItem = False + , sInTableCell = False + , sLastHeaderNum = DottedNum [] + , sLastFigureNum = DottedNum [] + , sLabels = M.empty + , sHasChapters = False + , sToggles = M.empty + } + +instance PandocMonad m => HasQuoteContext LaTeXState m where + getQuoteContext = sQuoteContext <$> getState + withQuoteContext context parser = do + oldState <- getState + let oldQuoteContext = sQuoteContext oldState + setState oldState { sQuoteContext = context } + result <- parser + newState <- getState + setState newState { sQuoteContext = oldQuoteContext } + return result + +instance HasLogMessages LaTeXState where + addLogMessage msg st = st{ sLogMessages = msg : sLogMessages st } + getLogMessages st = reverse $ sLogMessages st + +instance HasIdentifierList LaTeXState where + extractIdentifierList = sIdentifiers + updateIdentifierList f st = st{ sIdentifiers = f $ sIdentifiers st } + +instance HasIncludeFiles LaTeXState where + getIncludeFiles = sContainers + addIncludeFile f s = s{ sContainers = f : sContainers s } + dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s } + +instance HasHeaderMap LaTeXState where + extractHeaderMap = sHeaders + updateHeaderMap f st = st{ sHeaders = f $ sHeaders st } + +instance HasMacros LaTeXState where + extractMacros st = sMacros st + updateMacros f st = st{ sMacros = f (sMacros st) } + +instance HasReaderOptions LaTeXState where + extractReaderOptions = sOptions + +instance HasMeta LaTeXState where + setMeta field val st = + st{ sMeta = setMeta field val $ sMeta st } + deleteMeta field st = + st{ sMeta = deleteMeta field $ sMeta st } + +instance Default LaTeXState where + def = defaultLaTeXState + +type LP m = ParserT [Tok] LaTeXState m + +withVerbatimMode :: PandocMonad m => LP m a -> LP m a +withVerbatimMode parser = do + updateState $ \st -> st{ sVerbatimMode = True } + result <- parser + updateState $ \st -> st{ sVerbatimMode = False } + return result + +rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s) + => Bool -> LP m a -> LP m a -> ParserT String s m (a, String) +rawLaTeXParser retokenize parser valParser = do + inp <- getInput + let toks = tokenize "source" $ T.pack inp + pstate <- getState + let lstate = def{ sOptions = extractReaderOptions pstate } + let lstate' = lstate { sMacros = extractMacros pstate } + let rawparser = (,) <$> withRaw valParser <*> getState + res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks + case res' of + Left _ -> mzero + Right toks' -> do + res <- lift $ runParserT (do when retokenize $ do + -- retokenize, applying macros + doMacros 0 + ts <- many (satisfyTok (const True)) + setInput ts + rawparser) + lstate' "chunk" toks' + case res of + Left _ -> mzero + Right ((val, raw), st) -> do + updateState (updateMacros (sMacros st <>)) + _ <- takeP (T.length (untokenize toks')) + return (val, T.unpack (untokenize raw)) + +applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s) + => String -> ParserT String s m String +applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|> + do let retokenize = doMacros 0 *> + (toksToString <$> many (satisfyTok (const True))) + pstate <- getState + let lstate = def{ sOptions = extractReaderOptions pstate + , sMacros = extractMacros pstate } + res <- runParserT retokenize lstate "math" (tokenize "math" (T.pack s)) + case res of + Left e -> fail (show e) + Right s' -> return s' +tokenize :: SourceName -> Text -> [Tok] +tokenize sourcename = totoks (initialPos sourcename) + +totoks :: SourcePos -> Text -> [Tok] +totoks pos t = + case T.uncons t of + Nothing -> [] + Just (c, rest) + | c == '\n' -> + Tok pos Newline "\n" + : totoks (setSourceColumn (incSourceLine pos 1) 1) rest + | isSpaceOrTab c -> + let (sps, rest') = T.span isSpaceOrTab t + in Tok pos Spaces sps + : totoks (incSourceColumn pos (T.length sps)) + rest' + | isAlphaNum c -> + let (ws, rest') = T.span isAlphaNum t + in Tok pos Word ws + : totoks (incSourceColumn pos (T.length ws)) rest' + | c == '%' -> + let (cs, rest') = T.break (== '\n') rest + in Tok pos Comment ("%" <> cs) + : totoks (incSourceColumn pos (1 + T.length cs)) rest' + | c == '\\' -> + case T.uncons rest of + Nothing -> [Tok pos (CtrlSeq " ") "\\"] + Just (d, rest') + | isLetterOrAt d -> + -- \makeatletter is common in macro defs; + -- ideally we should make tokenization sensitive + -- to \makeatletter and \makeatother, but this is + -- probably best for now + let (ws, rest'') = T.span isLetterOrAt rest + (ss, rest''') = T.span isSpaceOrTab rest'' + in Tok pos (CtrlSeq ws) ("\\" <> ws <> ss) + : totoks (incSourceColumn pos + (1 + T.length ws + T.length ss)) rest''' + | isSpaceOrTab d || d == '\n' -> + let (w1, r1) = T.span isSpaceOrTab rest + (w2, (w3, r3)) = case T.uncons r1 of + Just ('\n', r2) + -> (T.pack "\n", + T.span isSpaceOrTab r2) + _ -> (mempty, (mempty, r1)) + ws = "\\" <> w1 <> w2 <> w3 + in case T.uncons r3 of + Just ('\n', _) -> + Tok pos (CtrlSeq " ") ("\\" <> w1) + : totoks (incSourceColumn pos (T.length ws)) + r1 + _ -> + Tok pos (CtrlSeq " ") ws + : totoks (incSourceColumn pos (T.length ws)) + r3 + | otherwise -> + Tok pos (CtrlSeq (T.singleton d)) (T.pack [c,d]) + : totoks (incSourceColumn pos 2) rest' + | c == '#' -> + let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest + in case safeRead (T.unpack t1) of + Just i -> + Tok pos (Arg i) ("#" <> t1) + : totoks (incSourceColumn pos (1 + T.length t1)) t2 + Nothing -> + Tok pos Symbol "#" + : totoks (incSourceColumn pos 1) t2 + | c == '^' -> + case T.uncons rest of + Just ('^', rest') -> + case T.uncons rest' of + Just (d, rest'') + | isLowerHex d -> + case T.uncons rest'' of + Just (e, rest''') | isLowerHex e -> + Tok pos Esc2 (T.pack ['^','^',d,e]) + : totoks (incSourceColumn pos 4) rest''' + _ -> + Tok pos Esc1 (T.pack ['^','^',d]) + : totoks (incSourceColumn pos 3) rest'' + | d < '\128' -> + Tok pos Esc1 (T.pack ['^','^',d]) + : totoks (incSourceColumn pos 3) rest'' + _ -> Tok pos Symbol "^" : + Tok (incSourceColumn pos 1) Symbol "^" : + totoks (incSourceColumn pos 2) rest' + _ -> Tok pos Symbol "^" + : totoks (incSourceColumn pos 1) rest + | otherwise -> + Tok pos Symbol (T.singleton c) : totoks (incSourceColumn pos 1) rest + +isSpaceOrTab :: Char -> Bool +isSpaceOrTab ' ' = True +isSpaceOrTab '\t' = True +isSpaceOrTab _ = False + +isLetterOrAt :: Char -> Bool +isLetterOrAt '@' = True +isLetterOrAt c = isLetter c + +isLowerHex :: Char -> Bool +isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f' + +untokenize :: [Tok] -> Text +untokenize = mconcat . map untoken + +untoken :: Tok -> Text +untoken (Tok _ _ t) = t + +toksToString :: [Tok] -> String +toksToString = T.unpack . untokenize + +satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok +satisfyTok f = + try $ do + res <- tokenPrim (T.unpack . untoken) updatePos matcher + doMacros 0 -- apply macros on remaining input stream + return res + where matcher t | f t = Just t + | otherwise = Nothing + updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos + updatePos _spos _ (Tok pos _ _ : _) = pos + updatePos spos _ [] = incSourceColumn spos 1 + +doMacros :: PandocMonad m => Int -> LP m () +doMacros n = do + verbatimMode <- sVerbatimMode <$> getState + unless verbatimMode $ do + inp <- getInput + case inp of + Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" : + Tok _ Word name : Tok _ Symbol "}" : ts + -> handleMacros spos name ts + Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" : + Tok _ Word name : Tok _ Symbol "}" : ts + -> handleMacros spos ("end" <> name) ts + Tok _ (CtrlSeq "expandafter") _ : t : ts + -> do setInput ts + doMacros n + getInput >>= setInput . combineTok t + Tok spos (CtrlSeq name) _ : ts + -> handleMacros spos name ts + _ -> return () + where combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts) + | T.all isLetterOrAt w = + Tok spos (CtrlSeq (name <> w)) (x1 <> w <> x2) : ts + where (x1, x2) = T.break isSpaceOrTab x + combineTok t ts = t:ts + handleMacros spos name ts = do + macros <- sMacros <$> getState + case M.lookup name macros of + Nothing -> return () + Just (Macro expansionPoint argspecs optarg newtoks) -> do + setInput ts + let matchTok (Tok _ toktype txt) = + satisfyTok (\(Tok _ toktype' txt') -> + toktype == toktype' && + txt == txt') + let matchPattern toks = try $ mapM_ matchTok toks + let getargs argmap [] = return argmap + getargs argmap (Pattern toks : rest) = try $ do + matchPattern toks + getargs argmap rest + getargs argmap (ArgNum i : Pattern toks : rest) = + try $ do + x <- mconcat <$> manyTill + (braced <|> ((:[]) <$> anyTok)) + (matchPattern toks) + getargs (M.insert i x argmap) rest + getargs argmap (ArgNum i : rest) = do + x <- try $ spaces >> bracedOrToken + getargs (M.insert i x argmap) rest + args <- case optarg of + Nothing -> getargs M.empty argspecs + Just o -> do + x <- option o bracketedToks + getargs (M.singleton 1 x) argspecs + -- first boolean param is true if we're tokenizing + -- an argument (in which case we don't want to + -- expand #1 etc.) + let addTok False (Tok _ (Arg i) _) acc = + case M.lookup i args of + Nothing -> mzero + Just xs -> foldr (addTok True) acc xs + -- see #4007 + addTok _ (Tok _ (CtrlSeq x) txt) + acc@(Tok _ Word _ : _) + | not (T.null txt) && + isLetter (T.last txt) = + Tok spos (CtrlSeq x) (txt <> " ") : acc + addTok _ t acc = setpos spos t : acc + ts' <- getInput + setInput $ foldr (addTok False) ts' newtoks + case expansionPoint of + ExpandWhenUsed -> + if n > 20 -- detect macro expansion loops + then throwError $ PandocMacroLoop (T.unpack name) + else doMacros (n + 1) + ExpandWhenDefined -> return () + + +setpos :: SourcePos -> Tok -> Tok +setpos spos (Tok _ tt txt) = Tok spos tt txt + +anyControlSeq :: PandocMonad m => LP m Tok +anyControlSeq = satisfyTok isCtrlSeq + +isCtrlSeq :: Tok -> Bool +isCtrlSeq (Tok _ (CtrlSeq _) _) = True +isCtrlSeq _ = False + +anySymbol :: PandocMonad m => LP m Tok +anySymbol = satisfyTok isSymbolTok + +isSymbolTok :: Tok -> Bool +isSymbolTok (Tok _ Symbol _) = True +isSymbolTok _ = False + +isWordTok :: Tok -> Bool +isWordTok (Tok _ Word _) = True +isWordTok _ = False + +isArgTok :: Tok -> Bool +isArgTok (Tok _ (Arg _) _) = True +isArgTok _ = False + +spaces :: PandocMonad m => LP m () +spaces = skipMany (satisfyTok (tokTypeIn [Comment, Spaces, Newline])) + +spaces1 :: PandocMonad m => LP m () +spaces1 = skipMany1 (satisfyTok (tokTypeIn [Comment, Spaces, Newline])) + +tokTypeIn :: [TokType] -> Tok -> Bool +tokTypeIn toktypes (Tok _ tt _) = tt `elem` toktypes + +controlSeq :: PandocMonad m => Text -> LP m Tok +controlSeq name = satisfyTok isNamed + where isNamed (Tok _ (CtrlSeq n) _) = n == name + isNamed _ = False + +symbol :: PandocMonad m => Char -> LP m Tok +symbol c = satisfyTok isc + where isc (Tok _ Symbol d) = case T.uncons d of + Just (c',_) -> c == c' + _ -> False + isc _ = False + +symbolIn :: PandocMonad m => [Char] -> LP m Tok +symbolIn cs = satisfyTok isInCs + where isInCs (Tok _ Symbol d) = case T.uncons d of + Just (c,_) -> c `elem` cs + _ -> False + isInCs _ = False + +sp :: PandocMonad m => LP m () +sp = whitespace <|> endline + +whitespace :: PandocMonad m => LP m () +whitespace = () <$ satisfyTok isSpaceTok + +isSpaceTok :: Tok -> Bool +isSpaceTok (Tok _ Spaces _) = True +isSpaceTok _ = False + +newlineTok :: PandocMonad m => LP m () +newlineTok = () <$ satisfyTok isNewlineTok + +isNewlineTok :: Tok -> Bool +isNewlineTok (Tok _ Newline _) = True +isNewlineTok _ = False + +comment :: PandocMonad m => LP m () +comment = () <$ satisfyTok isCommentTok + +isCommentTok :: Tok -> Bool +isCommentTok (Tok _ Comment _) = True +isCommentTok _ = False + +anyTok :: PandocMonad m => LP m Tok +anyTok = satisfyTok (const True) + +singleChar :: PandocMonad m => LP m Tok +singleChar = try $ do + Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol]) + guard $ not $ toktype == Symbol && + T.any (`Set.member` specialChars) t + if T.length t > 1 + then do + let (t1, t2) = (T.take 1 t, T.drop 1 t) + inp <- getInput + setInput $ Tok (incSourceColumn pos 1) toktype t2 : inp + return $ Tok pos toktype t1 + else return $ Tok pos toktype t + +specialChars :: Set.Set Char +specialChars = Set.fromList "#$%&~_^\\{}" + +endline :: PandocMonad m => LP m () +endline = try $ do + newlineTok + lookAhead anyTok + notFollowedBy blankline + +blankline :: PandocMonad m => LP m () +blankline = try $ skipMany whitespace *> newlineTok + +primEscape :: PandocMonad m => LP m Char +primEscape = do + Tok _ toktype t <- satisfyTok (tokTypeIn [Esc1, Esc2]) + case toktype of + Esc1 -> case T.uncons (T.drop 2 t) of + Just (c, _) + | c >= '\64' && c <= '\127' -> return (chr (ord c - 64)) + | otherwise -> return (chr (ord c + 64)) + Nothing -> fail "Empty content of Esc1" + Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of + Just x -> return (chr x) + Nothing -> fail $ "Could not read: " ++ T.unpack t + _ -> fail "Expected an Esc1 or Esc2 token" -- should not happen + +bgroup :: PandocMonad m => LP m Tok +bgroup = try $ do + skipMany sp + symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup" + +egroup :: PandocMonad m => LP m Tok +egroup = symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup" + +grouped :: (PandocMonad m, Monoid a) => LP m a -> LP m a +grouped parser = try $ do + bgroup + -- first we check for an inner 'grouped', because + -- {{a,b}} should be parsed the same as {a,b} + try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup) + +braced' :: PandocMonad m => LP m Tok -> Int -> LP m [Tok] +braced' getTok n = + handleEgroup <|> handleBgroup <|> handleOther + where handleEgroup = do + t <- egroup + if n == 1 + then return [] + else (t:) <$> braced' getTok (n - 1) + handleBgroup = do + t <- bgroup + (t:) <$> braced' getTok (n + 1) + handleOther = do + t <- getTok + (t:) <$> braced' getTok n + +braced :: PandocMonad m => LP m [Tok] +braced = bgroup *> braced' anyTok 1 + +-- URLs require special handling, because they can contain % +-- characters. So we retonenize comments as we go... +bracedUrl :: PandocMonad m => LP m [Tok] +bracedUrl = bgroup *> braced' (retokenizeComment >> anyTok) 1 + +-- For handling URLs, which allow literal % characters... +retokenizeComment :: PandocMonad m => LP m () +retokenizeComment = (do + Tok pos Comment txt <- satisfyTok isCommentTok + let updPos (Tok pos' toktype' txt') = + Tok (incSourceColumn (incSourceLine pos' (sourceLine pos - 1)) + (sourceColumn pos)) toktype' txt' + let newtoks = map updPos $ tokenize (sourceName pos) $ T.tail txt + getInput >>= setInput . ((Tok pos Symbol "%" : newtoks) ++)) + <|> return () + +bracedOrToken :: PandocMonad m => LP m [Tok] +bracedOrToken = braced <|> ((:[]) <$> (anyControlSeq <|> singleChar)) + +bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a +bracketed parser = try $ do + symbol '[' + mconcat <$> manyTill parser (symbol ']') + +bracketedToks :: PandocMonad m => LP m [Tok] +bracketedToks = do + symbol '[' + mconcat <$> manyTill (braced <|> (:[]) <$> anyTok) (symbol ']') + +parenWrapped :: PandocMonad m => Monoid a => LP m a -> LP m a +parenWrapped parser = try $ do + symbol '(' + mconcat <$> manyTill parser (symbol ')') + +dimenarg :: PandocMonad m => LP m Text +dimenarg = try $ do + ch <- option False $ True <$ symbol '=' + Tok _ _ s <- satisfyTok isWordTok + guard $ T.take 2 (T.reverse s) `elem` + ["pt","pc","in","bp","cm","mm","dd","cc","sp"] + let num = T.take (T.length s - 2) s + guard $ T.length num > 0 + guard $ T.all isDigit num + return $ T.pack ['=' | ch] <> s + +ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a +ignore raw = do + pos <- getPosition + report $ SkippedContent raw pos + return mempty + +withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok]) +withRaw parser = do + inp <- getInput + result <- parser + nxt <- option (Tok (initialPos "source") Word "") (lookAhead anyTok) + let raw = takeWhile (/= nxt) inp + return (result, raw) diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs index fa832114b..e3a302d49 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Types.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs @@ -31,6 +31,7 @@ Types for LaTeX tokens and macros. module Text.Pandoc.Readers.LaTeX.Types ( Tok(..) , TokType(..) , Macro(..) + , ArgSpec(..) , ExpansionPoint(..) , SourcePos ) @@ -49,5 +50,8 @@ data Tok = Tok SourcePos TokType Text data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed deriving (Eq, Ord, Show) -data Macro = Macro ExpansionPoint Int (Maybe [Tok]) [Tok] +data Macro = Macro ExpansionPoint [ArgSpec] (Maybe [Tok]) [Tok] + deriving Show + +data ArgSpec = ArgNum Int | Pattern [Tok] deriving Show diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 156b2b622..d1ea7a1a5 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE RelaxedPolyRec #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TupleSections #-} {- Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> @@ -31,31 +32,28 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of markdown-formatted plain text to 'Pandoc' document. -} -module Text.Pandoc.Readers.Markdown ( readMarkdown ) where +module Text.Pandoc.Readers.Markdown ( readMarkdown, yamlToMeta ) where import Prelude import Control.Monad import Control.Monad.Except (throwError) +import qualified Data.ByteString.Lazy as BS import Data.Char (isAlphaNum, isPunctuation, isSpace, toLower) -import qualified Data.HashMap.Strict as H import Data.List (intercalate, sortBy, transpose, elemIndex) import qualified Data.Map as M import Data.Maybe import Data.Ord (comparing) -import Data.Scientific (base10Exponent, coefficient) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T -import qualified Data.Vector as V -import Data.Yaml (ParseException (..), YamlException (..), YamlMark (..)) -import qualified Data.Yaml as Yaml +import qualified Data.YAML as YAML import System.FilePath (addExtension, takeExtension) import Text.HTML.TagSoup import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad (..), report) import Text.Pandoc.Definition -import Text.Pandoc.Emoji (emojis) +import Text.Pandoc.Emoji (emojiToInline) import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.Options @@ -234,11 +232,9 @@ pandocTitleBlock = try $ do $ nullMeta updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } - yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks) yamlMetaBlock = try $ do guardEnabled Ext_yaml_metadata_block - pos <- getPosition string "---" blankline notFollowedBy blankline -- if --- is followed by a blank it's an HRULE @@ -246,52 +242,44 @@ yamlMetaBlock = try $ do -- by including --- and ..., we allow yaml blocks with just comments: let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."])) optional blanklines - case Yaml.decodeEither' $ UTF8.fromString rawYaml of - Right (Yaml.Object hashmap) -> do - let alist = H.toList hashmap - mapM_ (\(k, v) -> - if ignorable k - then return () - else do - v' <- yamlToMeta v - let k' = T.unpack k - updateState $ \st -> st{ stateMeta' = - do m <- stateMeta' st - -- if there's already a value, leave it unchanged - case lookupMeta k' m of - Just _ -> return m - Nothing -> do - v'' <- v' - return $ B.setMeta (T.unpack k) v'' m} - ) alist - Right Yaml.Null -> return () + newMetaF <- yamlBsToMeta $ UTF8.fromStringLazy rawYaml + -- Since `<>` is left-biased, existing values are not touched: + updateState $ \st -> st{ stateMeta' = (stateMeta' st) <> newMetaF } + return mempty + +-- | Read a YAML string and convert it to pandoc metadata. +-- String scalars in the YAML are parsed as Markdown. +yamlToMeta :: PandocMonad m => BS.ByteString -> m Meta +yamlToMeta bstr = do + let parser = do + meta <- yamlBsToMeta bstr + return $ runF meta defaultParserState + parsed <- readWithM parser def "" + case parsed of + Right result -> return result + Left e -> throwError e + +yamlBsToMeta :: PandocMonad m => BS.ByteString -> MarkdownParser m (F Meta) +yamlBsToMeta bstr = do + pos <- getPosition + case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of + Right ((YAML.Doc (YAML.Mapping _ o)):_) -> (fmap Meta) <$> yamlMap o + Right [] -> return . return $ mempty + Right [YAML.Doc (YAML.Scalar YAML.SNull)] -> return . return $ mempty Right _ -> do - logMessage $ - CouldNotParseYamlMetadata "not an object" - pos - return () + logMessage $ + CouldNotParseYamlMetadata "not an object" + pos + return . return $ mempty Left err' -> do - case err' of - InvalidYaml (Just YamlParseException{ - yamlProblem = problem - , yamlContext = _ctxt - , yamlProblemMark = Yaml.YamlMark { - yamlLine = yline - , yamlColumn = ycol - }}) -> - logMessage $ CouldNotParseYamlMetadata - problem (setSourceLine - (setSourceColumn pos - (sourceColumn pos + ycol)) - (sourceLine pos + 1 + yline)) - _ -> logMessage $ CouldNotParseYamlMetadata - (show err') pos - return () - return mempty + logMessage $ CouldNotParseYamlMetadata + err' pos + return . return $ mempty --- ignore fields ending with _ -ignorable :: Text -> Bool -ignorable t = (T.pack "_") `T.isSuffixOf` t +nodeToKey :: Monad m => YAML.Node -> m Text +nodeToKey (YAML.Scalar (YAML.SStr t)) = return t +nodeToKey (YAML.Scalar (YAML.SUnknown _ t)) = return t +nodeToKey _ = fail "Non-string key in YAML mapping" toMetaValue :: PandocMonad m => Text -> MarkdownParser m (F MetaValue) @@ -312,34 +300,51 @@ toMetaValue x = -- not end in a newline, but a "block" set off with -- `|` or `>` will. -yamlToMeta :: PandocMonad m - => Yaml.Value -> MarkdownParser m (F MetaValue) -yamlToMeta (Yaml.String t) = toMetaValue t -yamlToMeta (Yaml.Number n) - -- avoid decimal points for numbers that don't need them: - | base10Exponent n >= 0 = return $ return $ MetaString $ show - $ coefficient n * (10 ^ base10Exponent n) - | otherwise = return $ return $ MetaString $ show n -yamlToMeta (Yaml.Bool b) = return $ return $ MetaBool b -yamlToMeta (Yaml.Array xs) = do - xs' <- mapM yamlToMeta (V.toList xs) +checkBoolean :: Text -> Maybe Bool +checkBoolean t = + if t == T.pack "true" || t == T.pack "True" || t == T.pack "TRUE" + then Just True + else if t == T.pack "false" || t == T.pack "False" || t == T.pack "FALSE" + then Just False + else Nothing + +yamlToMetaValue :: PandocMonad m + => YAML.Node -> MarkdownParser m (F MetaValue) +yamlToMetaValue (YAML.Scalar x) = + case x of + YAML.SStr t -> toMetaValue t + YAML.SBool b -> return $ return $ MetaBool b + YAML.SFloat d -> return $ return $ MetaString (show d) + YAML.SInt i -> return $ return $ MetaString (show i) + YAML.SUnknown _ t -> + case checkBoolean t of + Just b -> return $ return $ MetaBool b + Nothing -> toMetaValue t + YAML.SNull -> return $ return $ MetaString "" +yamlToMetaValue (YAML.Sequence _ xs) = do + xs' <- mapM yamlToMetaValue xs return $ do xs'' <- sequence xs' return $ B.toMetaValue xs'' -yamlToMeta (Yaml.Object o) = do - let alist = H.toList o - foldM (\m (k,v) -> - if ignorable k - then return m - else do - v' <- yamlToMeta v - return $ do - MetaMap m' <- m - v'' <- v' - return (MetaMap $ M.insert (T.unpack k) v'' m')) - (return $ MetaMap M.empty) - alist -yamlToMeta _ = return $ return $ MetaString "" +yamlToMetaValue (YAML.Mapping _ o) = fmap B.toMetaValue <$> yamlMap o +yamlToMetaValue _ = return $ return $ MetaString "" + +yamlMap :: PandocMonad m + => M.Map YAML.Node YAML.Node + -> MarkdownParser m (F (M.Map String MetaValue)) +yamlMap o = do + kvs <- forM (M.toList o) $ \(key, v) -> do + k <- nodeToKey key + return (k, v) + let kvs' = filter (not . ignorable . fst) kvs + (fmap M.fromList . sequence) <$> mapM toMeta kvs' + where + ignorable t = (T.pack "_") `T.isSuffixOf` t + toMeta (k, v) = do + fv <- yamlToMetaValue v + return $ do + v' <- fv + return (T.unpack k, v') stopLine :: PandocMonad m => MarkdownParser m () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () @@ -966,7 +971,9 @@ orderedList = try $ do <|> return (style == Example) items <- fmap sequence $ many1 $ listItem fourSpaceRule (orderedListStart (Just (style, delim))) - start' <- (start <$ guardEnabled Ext_startnum) <|> return 1 + start' <- if style == Example + then return start + else (start <$ guardEnabled Ext_startnum) <|> return 1 return $ B.orderedListWith (start', style, delim) <$> fmap compactify items bulletList :: PandocMonad m => MarkdownParser m (F Blocks) @@ -1142,10 +1149,9 @@ rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks) rawTeXBlock = do guardEnabled Ext_raw_tex lookAhead $ try $ char '\\' >> letter - result <- (B.rawBlock "context" . trim . concat <$> - many1 ((++) <$> (rawConTeXtEnvironment <|> conTeXtCommand) - <*> spnl')) - <|> (B.rawBlock "latex" . trim . concat <$> + result <- (B.rawBlock "tex" . trim . concat <$> + many1 ((++) <$> rawConTeXtEnvironment <*> spnl')) + <|> (B.rawBlock "tex" . trim . concat <$> many1 ((++) <$> rawLaTeXBlock <*> spnl')) return $ case B.toList result of [RawBlock _ cs] @@ -1153,9 +1159,6 @@ rawTeXBlock = do -- don't create a raw block for suppressed macro defs _ -> return result -conTeXtCommand :: PandocMonad m => MarkdownParser m String -conTeXtCommand = oneOfStrings ["\\placeformula"] - rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks) rawHtmlBlocks = do (TagOpen tagtype _, raw) <- htmlTag isBlockTag @@ -1591,7 +1594,7 @@ code = try $ do starts <- many1 (char '`') skipSpaces result <- (trim . concat) <$> - many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> + manyTill (many1 (noneOf "`\n") <|> many1 (char '`') <|> (char '\n' >> notFollowedBy' blankline >> return " ")) (try (skipSpaces >> count (length starts) (char '`') >> notFollowedBy (char '`'))) @@ -1877,23 +1880,24 @@ bareURL :: PandocMonad m => MarkdownParser m (F Inlines) bareURL = try $ do guardEnabled Ext_autolink_bare_uris getState >>= guard . stateAllowLinks - (orig, src) <- uri <|> emailAddress + (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress) notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a") - return $ return $ B.link src "" (B.str orig) + return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig) autoLink :: PandocMonad m => MarkdownParser m (F Inlines) autoLink = try $ do getState >>= guard . stateAllowLinks char '<' - (orig, src) <- uri <|> emailAddress + (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress) -- in rare cases, something may remain after the uri parser -- is finished, because the uri parser tries to avoid parsing -- final punctuation. for example: in `<http://hi---there>`, -- the URI parser will stop before the dashes. extra <- fromEntities <$> manyTill nonspaceChar (char '>') - attr <- option nullAttr $ try $ + attr <- option ("", [cls], []) $ try $ guardEnabled Ext_link_attributes >> attributes - return $ return $ B.linkWith attr (src ++ escapeURI extra) "" (B.str $ orig ++ extra) + return $ return $ B.linkWith attr (src ++ escapeURI extra) "" + (B.str $ orig ++ extra) image :: PandocMonad m => MarkdownParser m (F Inlines) image = try $ do @@ -2037,9 +2041,9 @@ emoji = try $ do char ':' emojikey <- many1 (oneOf emojiChars) char ':' - case M.lookup emojikey emojis of - Just s -> return (return (B.str s)) - Nothing -> mzero + case emojiToInline emojikey of + Just i -> return (return $ B.singleton i) + Nothing -> mzero -- Citations diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index fe6b3698c..134598c07 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -35,34 +35,32 @@ TODO: - Page breaks (five "*") - Org tables - table.el tables -- Images with attributes (floating and width) - <cite> tag -} module Text.Pandoc.Readers.Muse (readMuse) where import Prelude import Control.Monad +import Control.Monad.Reader import Control.Monad.Except (throwError) import Data.Bifunctor -import Data.Char (isLetter) +import Data.Char (isAlphaNum) import Data.Default -import Data.List (stripPrefix, intercalate) +import Data.List (intercalate) import Data.List.Split (splitOn) import qualified Data.Map as M import qualified Data.Set as Set -import Data.Maybe (fromMaybe, isNothing) +import Data.Maybe (fromMaybe, isNothing, maybeToList) import Data.Text (Text, unpack) -import System.FilePath (takeExtension) -import Text.HTML.TagSoup import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad (..)) import Text.Pandoc.Definition +import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (F) -import Text.Pandoc.Readers.HTML (htmlTag) -import Text.Pandoc.Shared (crFilter, underlineSpan) +import Text.Pandoc.Parsing hiding (F, enclosed) +import Text.Pandoc.Shared (crFilter, underlineSpan, mapLeft) -- | Read Muse from an input string and return a Pandoc document. readMuse :: PandocMonad m @@ -70,7 +68,8 @@ readMuse :: PandocMonad m -> Text -> m Pandoc readMuse opts s = do - res <- readWithM parseMuse def{ museOptions = opts } (unpack (crFilter s)) + let input = crFilter s + res <- mapLeft (PandocParsecError $ unpack input) `liftM` runReaderT (runParserT parseMuse def{ museOptions = opts } "source" input) def case res of Left e -> throwError e Right d -> return d @@ -84,7 +83,6 @@ data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata , museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed , museLogMessages :: [LogMessage] , museNotes :: M.Map String (SourcePos, F Blocks) - , museInLink :: Bool -- ^ True when parsing a link description to avoid nested links , museInPara :: Bool -- ^ True when looking for a paragraph terminator } @@ -96,11 +94,17 @@ instance Default MuseState where , museLastStrPos = Nothing , museLogMessages = [] , museNotes = M.empty - , museInLink = False , museInPara = False } -type MuseParser = ParserT String MuseState +data MuseEnv = + MuseEnv { museInLink :: Bool -- ^ True when parsing a link description to avoid nested links + } + +instance Default MuseEnv where + def = MuseEnv { museInLink = False } + +type MuseParser m = ParserT Text MuseState (ReaderT MuseEnv m) instance HasReaderOptions MuseState where extractReaderOptions = museOptions @@ -125,11 +129,9 @@ instance HasLogMessages MuseState where parseMuse :: PandocMonad m => MuseParser m Pandoc parseMuse = do many directive - blocks <- parseBlocks + blocks <- (:) <$> parseBlocks <*> many parseSection st <- getState - let doc = runF (do Pandoc _ bs <- B.doc <$> blocks - meta <- museMeta st - return $ Pandoc meta bs) st + let doc = runF (Pandoc <$> museMeta st <*> fmap B.toList (mconcat blocks)) st reportLogMessages return doc @@ -144,9 +146,8 @@ commonPrefix (x:xs) (y:ys) -- | Trim up to one newline from the beginning of the string. lchop :: String -> String -lchop s = case s of - '\n':ss -> ss - _ -> s +lchop ('\n':xs) = xs +lchop s = s -- | Trim up to one newline from the end of the string. rchop :: String -> String @@ -165,12 +166,19 @@ atStart p = do guard $ museLastStrPos st /= Just pos p +firstColumn :: PandocMonad m => MuseParser m () +firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1) + -- * Parsers -- | Parse end-of-line, which can be either a newline or end-of-file. eol :: Stream s m Char => ParserT s st m () eol = void newline <|> eof +getIndent :: PandocMonad m + => MuseParser m Int +getIndent = subtract 1 . sourceColumn <$ many spaceChar <*> getPosition + someUntil :: (Stream s m t) => ParserT s u m a -> ParserT s u m b @@ -179,28 +187,21 @@ someUntil p end = first <$> ((:) <$> p) <*> manyUntil p end -- ** HTML parsers --- | Parse HTML tag, returning its attributes and literal contents. -htmlElement :: PandocMonad m - => String -- ^ Tag name - -> MuseParser m (Attr, String) -htmlElement tag = try $ do - (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) - content <- manyTill anyChar endtag - return (htmlAttrToPandoc attr, content) +openTag :: PandocMonad m => String -> MuseParser m [(String, String)] +openTag tag = try $ + char '<' *> string tag *> manyTill attr (char '>') where - endtag = void $ htmlTag (~== TagClose tag) + attr = try $ (,) + <$ many1 spaceChar + <*> many1 (noneOf "=\n") + <* string "=\"" + <*> manyTill (noneOf "\"") (char '"') -htmlBlock :: PandocMonad m - => String -- ^ Tag name - -> MuseParser m (Attr, String) -htmlBlock tag = try $ do - many spaceChar - res <- htmlElement tag - manyTill spaceChar eol - return res +closeTag :: PandocMonad m => String -> MuseParser m () +closeTag tag = try $ string "</" *> string tag *> void (char '>') -- | Convert HTML attributes to Pandoc 'Attr' -htmlAttrToPandoc :: [Attribute String] -> Attr +htmlAttrToPandoc :: [(String, String)] -> Attr htmlAttrToPandoc attrs = (ident, classes, keyvals) where ident = fromMaybe "" $ lookup "id" attrs @@ -211,15 +212,12 @@ parseHtmlContent :: PandocMonad m => String -- ^ Tag name -> MuseParser m (Attr, F Blocks) parseHtmlContent tag = try $ do - many spaceChar - pos <- getPosition - (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) + indent <- getIndent + attr <- openTag tag manyTill spaceChar eol - content <- parseBlocksTill $ try $ count (sourceColumn pos - 1) spaceChar >> endtag + content <- parseBlocksTill $ try $ count indent spaceChar *> closeTag tag manyTill spaceChar eol -- closing tag must be followed by optional whitespace and newline return (htmlAttrToPandoc attr, content) - where - endtag = void $ htmlTag (~== TagClose tag) -- ** Directive parsers @@ -228,21 +226,19 @@ parseDirectiveKey :: PandocMonad m => MuseParser m String parseDirectiveKey = char '#' *> many (letter <|> char '-') parseEmacsDirective :: PandocMonad m => MuseParser m (String, F Inlines) -parseEmacsDirective = do - key <- parseDirectiveKey - spaceChar - value <- trimInlinesF . mconcat <$> manyTill (choice inlineList) eol - return (key, value) +parseEmacsDirective = (,) + <$> parseDirectiveKey + <* spaceChar + <*> (trimInlinesF . mconcat <$> manyTill inline' eol) parseAmuseDirective :: PandocMonad m => MuseParser m (String, F Inlines) -parseAmuseDirective = do - key <- parseDirectiveKey - many1 spaceChar - value <- trimInlinesF . mconcat <$> many1Till inline endOfDirective - many blankline - return (key, value) +parseAmuseDirective = (,) + <$> parseDirectiveKey + <* many1 spaceChar + <*> (trimInlinesF . mconcat <$> many1Till inline endOfDirective) + <* many blankline where - endOfDirective = lookAhead $ eof <|> try (newline >> (void blankline <|> void parseDirectiveKey)) + endOfDirective = lookAhead $ eof <|> try (newline *> (void blankline <|> void parseDirectiveKey)) directive :: PandocMonad m => MuseParser m () directive = do @@ -254,17 +250,20 @@ directive = do -- ** Block parsers +-- | Parse section contents until EOF or next header parseBlocks :: PandocMonad m => MuseParser m (F Blocks) parseBlocks = try (parseEnd <|> + nextSection <|> blockStart <|> listStart <|> paraStart) where + nextSection = mempty <$ lookAhead headingStart parseEnd = mempty <$ eof - blockStart = ((B.<>) <$> (emacsHeading <|> blockElements <|> emacsNoteBlock) - <*> parseBlocks) <|> (uncurry (B.<>) <$> amuseHeadingUntil parseBlocks) + blockStart = (B.<>) <$> (blockElements <|> emacsNoteBlock) + <*> parseBlocks listStart = do updateState (\st -> st { museInPara = False }) uncurry (B.<>) <$> (anyListUntil parseBlocks <|> amuseNoteBlockUntil parseBlocks) @@ -273,6 +272,13 @@ parseBlocks = uncurry (B.<>) . first (p indent) <$> paraUntil parseBlocks where p indent = if indent >= 2 && indent < 6 then fmap B.blockQuote else id +-- | Parse section that starts with a header +parseSection :: PandocMonad m + => MuseParser m (F Blocks) +parseSection = + ((B.<>) <$> emacsHeading <*> parseBlocks) <|> + (uncurry (B.<>) <$> amuseHeadingUntil parseBlocks) + parseBlocksTill :: PandocMonad m => MuseParser m a -> MuseParser m (F Blocks) @@ -347,31 +353,32 @@ blockElements = do -- | Parse a line comment, starting with @;@ in the first column. comment :: PandocMonad m => MuseParser m (F Blocks) -comment = try $ do - getPosition >>= \pos -> guard (sourceColumn pos == 1) - char ';' - optional (spaceChar >> many (noneOf "\n")) - eol - return mempty +comment = try $ mempty + <$ firstColumn + <* char ';' + <* optional (spaceChar *> many (noneOf "\n")) + <* eol -- | Parse a horizontal rule, consisting of 4 or more @\'-\'@ characters. separator :: PandocMonad m => MuseParser m (F Blocks) -separator = try $ do - string "----" - many $ char '-' - many spaceChar - eol - return $ return B.horizontalRule +separator = try $ pure B.horizontalRule + <$ string "----" + <* many (char '-') + <* many spaceChar + <* eol + +headingStart :: PandocMonad m => MuseParser m (String, Int) +headingStart = try $ (,) + <$> option "" (try (parseAnchor <* manyTill spaceChar eol)) + <* firstColumn + <*> fmap length (many1 $ char '*') + <* spaceChar -- | Parse a single-line heading. emacsHeading :: PandocMonad m => MuseParser m (F Blocks) emacsHeading = try $ do guardDisabled Ext_amuse - anchorId <- option "" $ try (parseAnchor <* manyTill spaceChar eol) - getPosition >>= \pos -> guard (sourceColumn pos == 1) - level <- fmap length $ many1 $ char '*' - guard $ level <= 5 - spaceChar + (anchorId, level) <- headingStart content <- trimInlinesF . mconcat <$> manyTill inline eol attr <- registerHeader (anchorId, [], []) (runF content def) return $ B.headerWith attr level <$> content @@ -383,11 +390,7 @@ amuseHeadingUntil :: PandocMonad m -> MuseParser m (F Blocks, a) amuseHeadingUntil end = try $ do guardEnabled Ext_amuse - anchorId <- option "" $ try (parseAnchor <* manyTill spaceChar eol) - getPosition >>= \pos -> guard (sourceColumn pos == 1) - level <- fmap length $ many1 $ char '*' - guard $ level <= 5 - spaceChar + (anchorId, level) <- headingStart (content, e) <- paraContentsUntil end attr <- registerHeader (anchorId, [], []) (runF content def) return (B.headerWith attr level <$> content, e) @@ -395,33 +398,28 @@ amuseHeadingUntil end = try $ do -- | Parse an example between @{{{@ and @}}}@. -- It is an Amusewiki extension influenced by Creole wiki, as described in @Text::Amuse@ documentation. example :: PandocMonad m => MuseParser m (F Blocks) -example = try $ do - string "{{{" - optional blankline - contents <- manyTill anyChar $ try (optional blankline >> string "}}}") - return $ return $ B.codeBlock contents +example = try $ pure . B.codeBlock + <$ string "{{{" + <* optional blankline + <*> manyTill anyChar (try (optional blankline *> string "}}}")) -- | Parse an @\<example>@ tag. exampleTag :: PandocMonad m => MuseParser m (F Blocks) -exampleTag = try $ do - (attr, contents) <- htmlBlock "example" - return $ return $ B.codeBlockWith attr $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop contents +exampleTag = try $ fmap pure $ B.codeBlockWith + <$ many spaceChar + <*> (htmlAttrToPandoc <$> openTag "example") + <*> (rchop . intercalate "\n" . dropSpacePrefix . splitOn "\n" . lchop <$> manyTill anyChar (closeTag "example")) + <* manyTill spaceChar eol -- | Parse a @\<literal>@ tag as a raw block. -- For 'RawInline' @\<literal>@ parser, see 'inlineLiteralTag'. literalTag :: PandocMonad m => MuseParser m (F Blocks) -literalTag = try $ do - many spaceChar - (TagOpen _ attr, _) <- htmlTag (~== TagOpen "literal" []) - manyTill spaceChar eol - content <- manyTill anyChar endtag - manyTill spaceChar eol - return $ return $ rawBlock (htmlAttrToPandoc attr, content) - where - endtag = void $ htmlTag (~== TagClose "literal") - -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML - format (_, _, kvs) = fromMaybe "html" $ lookup "style" kvs - rawBlock (attrs, content) = B.rawBlock (format attrs) $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content +literalTag = try $ fmap pure $ B.rawBlock + <$ many spaceChar + <*> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML + <* manyTill spaceChar eol + <*> (rchop . intercalate "\n" . dropSpacePrefix . splitOn "\n" . lchop <$> manyTill anyChar (closeTag "literal")) + <* manyTill spaceChar eol -- | Parse @\<center>@ tag. -- Currently it is ignored as Pandoc cannot represent centered blocks. @@ -459,25 +457,27 @@ playTag = do fmap (B.divWith ("", ["play"], [])) . snd <$> parseHtmlContent "play" verseLine :: PandocMonad m => MuseParser m (F Inlines) -verseLine = do - indent <- (B.str <$> many1 (char ' ' >> pure '\160')) <|> pure mempty - rest <- manyTill (choice inlineList) newline - return $ trimInlinesF $ mconcat (pure indent : rest) - -verseLines :: PandocMonad m => MuseParser m (F Blocks) -verseLines = do - lns <- many verseLine - return $ B.lineBlock <$> sequence lns +verseLine = (<>) + <$> fmap pure (option mempty (B.str <$> many1 ('\160' <$ char ' '))) + <*> fmap (trimInlinesF . mconcat) (manyTill inline' eol) -- | Parse @\<verse>@ tag. verseTag :: PandocMonad m => MuseParser m (F Blocks) -verseTag = do - (_, content) <- htmlBlock "verse" - parseFromString verseLines (intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content) +verseTag = try $ do + indent <- getIndent + openTag "verse" + manyTill spaceChar eol + content <- sequence <$> manyTill (count indent spaceChar *> verseLine) (try $ count indent spaceChar *> closeTag "verse") + manyTill spaceChar eol + return $ B.lineBlock <$> content -- | Parse @\<comment>@ tag. commentTag :: PandocMonad m => MuseParser m (F Blocks) -commentTag = htmlBlock "comment" >> return mempty +commentTag = try $ mempty + <$ many spaceChar + <* openTag "comment" + <* manyTill anyChar (closeTag "comment") + <* manyTill spaceChar eol -- | Parse paragraph contents. paraContentsUntil :: PandocMonad m @@ -485,7 +485,7 @@ paraContentsUntil :: PandocMonad m -> MuseParser m (F Inlines, a) paraContentsUntil end = do updateState (\st -> st { museInPara = True }) - (l, e) <- someUntil inline $ try (manyTill spaceChar eol >> end) + (l, e) <- someUntil inline $ try (manyTill spaceChar eol *> end) updateState (\st -> st { museInPara = False }) return (trimInlinesF $ mconcat l, e) @@ -499,9 +499,10 @@ paraUntil end = do first (fmap B.para) <$> paraContentsUntil end noteMarker :: PandocMonad m => MuseParser m String -noteMarker = try $ do - char '[' - (:) <$> oneOf "123456789" <*> manyTill digit (char ']') +noteMarker = try $ (:) + <$ char '[' + <*> oneOf "123456789" + <*> manyTill digit (char ']') -- Amusewiki version of note -- Parsing is similar to list item, except that note marker is used instead of list marker @@ -541,27 +542,15 @@ emacsNoteBlock = try $ do -- Verse markup -- -lineVerseLine :: PandocMonad m => MuseParser m (F Inlines) -lineVerseLine = try $ do - string "> " - indent <- many (char ' ' >> pure '\160') - let indentEl = if null indent then mempty else B.str indent - rest <- manyTill (choice inlineList) eol - return $ trimInlinesF $ mconcat (pure indentEl : rest) - -blanklineVerseLine :: PandocMonad m => MuseParser m (F Inlines) -blanklineVerseLine = try $ do - char '>' - blankline - pure mempty - -- | Parse a line block indicated by @\'>\'@ characters. lineBlock :: PandocMonad m => MuseParser m (F Blocks) lineBlock = try $ do - many spaceChar - col <- sourceColumn <$> getPosition - lns <- (blanklineVerseLine <|> lineVerseLine) `sepBy1'` try (indentWith (col - 1)) + indent <- getIndent + lns <- (blankVerseLine <|> nonblankVerseLine) `sepBy1'` try (indentWith indent) return $ B.lineBlock <$> sequence lns + where + blankVerseLine = try $ mempty <$ char '>' <* blankline + nonblankVerseLine = try (string "> ") *> verseLine -- *** List parsers @@ -573,7 +562,7 @@ bulletListItemsUntil indent end = try $ do char '-' void spaceChar <|> lookAhead eol updateState (\st -> st { museInPara = False }) - (x, (xs, e)) <- listItemContentsUntil (indent + 2) (try (optional blankline >> indentWith indent >> bulletListItemsUntil indent end)) (([],) <$> end) + (x, (xs, e)) <- listItemContentsUntil (indent + 2) (try (optional blankline *> indentWith indent *> bulletListItemsUntil indent end)) (([],) <$> end) return (x:xs, e) -- | Parse a bullet list. @@ -581,19 +570,9 @@ bulletListUntil :: PandocMonad m => MuseParser m a -> MuseParser m (F Blocks, a) bulletListUntil end = try $ do - many spaceChar - pos <- getPosition - let indent = sourceColumn pos - 1 + indent <- getIndent guard $ indent /= 0 - (items, e) <- bulletListItemsUntil indent end - return (B.bulletList <$> sequence items, e) - --- | Parses an ordered list marker and returns list attributes. -anyMuseOrderedListMarker :: PandocMonad m => MuseParser m ListAttributes -anyMuseOrderedListMarker = do - (style, start) <- decimal <|> lowerRoman <|> upperRoman <|> lowerAlpha <|> upperAlpha - char '.' - return (start, style, Period) + first (fmap B.bulletList . sequence) <$> bulletListItemsUntil indent end museOrderedListMarker :: PandocMonad m => ListNumberStyle @@ -620,7 +599,7 @@ orderedListItemsUntil indent style end = pos <- getPosition void spaceChar <|> lookAhead eol updateState (\st -> st { museInPara = False }) - (x, (xs, e)) <- listItemContentsUntil (sourceColumn pos) (try (optional blankline >> indentWith indent >> museOrderedListMarker style >> continuation)) (([],) <$> end) + (x, (xs, e)) <- listItemContentsUntil (sourceColumn pos) (try (optional blankline *> indentWith indent *> museOrderedListMarker style *> continuation)) (([],) <$> end) return (x:xs, e) -- | Parse an ordered list. @@ -628,14 +607,12 @@ orderedListUntil :: PandocMonad m => MuseParser m a -> MuseParser m (F Blocks, a) orderedListUntil end = try $ do - many spaceChar - pos <- getPosition - let indent = sourceColumn pos - 1 + indent <- getIndent guard $ indent /= 0 - p@(_, style, _) <- anyMuseOrderedListMarker - guard $ style `elem` [Decimal, LowerAlpha, UpperAlpha, LowerRoman, UpperRoman] - (items, e) <- orderedListItemsUntil indent style end - return (B.orderedListWith p <$> sequence items, e) + (style, start) <- decimal <|> lowerRoman <|> upperRoman <|> lowerAlpha <|> upperAlpha + char '.' + first (fmap (B.orderedListWith (start, style, Period)) . sequence) + <$> orderedListItemsUntil indent style end descriptionsUntil :: PandocMonad m => Int @@ -644,7 +621,7 @@ descriptionsUntil :: PandocMonad m descriptionsUntil indent end = do void spaceChar <|> lookAhead eol updateState (\st -> st { museInPara = False }) - (x, (xs, e)) <- listItemContentsUntil indent (try (optional blankline >> indentWith indent >> manyTill spaceChar (string "::") >> descriptionsUntil indent end)) (([],) <$> end) + (x, (xs, e)) <- listItemContentsUntil indent (try (optional blankline *> indentWith indent *> manyTill spaceChar (string "::") *> descriptionsUntil indent end)) (([],) <$> end) return (x:xs, e) definitionListItemsUntil :: PandocMonad m @@ -656,8 +633,8 @@ definitionListItemsUntil indent end = where continuation = try $ do pos <- getPosition - term <- trimInlinesF . mconcat <$> manyTill (choice inlineList) (try $ string "::") - (x, (xs, e)) <- descriptionsUntil (sourceColumn pos) (try (optional blankline >> indentWith indent >> continuation) <|> (([],) <$> end)) + term <- trimInlinesF . mconcat <$> manyTill inline' (try $ string "::") + (x, (xs, e)) <- descriptionsUntil (sourceColumn pos) (try (optional blankline *> indentWith indent *> continuation) <|> (([],) <$> end)) let xx = (,) <$> term <*> sequence x return (xx:xs, e) @@ -666,9 +643,7 @@ definitionListUntil :: PandocMonad m => MuseParser m a -- ^ Terminator parser -> MuseParser m (F Blocks, a) definitionListUntil end = try $ do - many spaceChar - pos <- getPosition - let indent = sourceColumn pos - 1 + indent <- getIndent guardDisabled Ext_amuse <|> guard (indent /= 0) -- Initial space is required by Amusewiki, but not Emacs Muse first (fmap B.definitionList . sequence) <$> definitionListItemsUntil indent end @@ -713,7 +688,7 @@ museAppendElement element tbl = tableCell :: PandocMonad m => MuseParser m (F Blocks) tableCell = try $ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline (lookAhead cellEnd) - where cellEnd = try $ void (many1 spaceChar >> char '|') <|> eol + where cellEnd = try $ void (many1 spaceChar *> char '|') <|> eol tableElements :: PandocMonad m => MuseParser m (F [MuseTableElement]) tableElements = sequence <$> (tableParseElement `sepEndBy1` eol) @@ -735,11 +710,10 @@ tableParseElement = tableParseHeader tableParseRow :: PandocMonad m => Int -- ^ Number of separator characters -> MuseParser m (F [Blocks]) -tableParseRow n = try $ do - fields <- tableCell `sepBy2` fieldSep - return $ sequence fields - where p `sepBy2` sep = (:) <$> p <*> many1 (sep >> p) - fieldSep = many1 spaceChar >> count n (char '|') >> (void (many1 spaceChar) <|> void (lookAhead newline)) +tableParseRow n = try $ + sequence <$> (tableCell `sepBy2` fieldSep) + where p `sepBy2` sep = (:) <$> p <*> many1 (sep *> p) + fieldSep = many1 spaceChar *> count n (char '|') *> (void (many1 spaceChar) <|> void (lookAhead newline)) -- | Parse a table header row. tableParseHeader :: PandocMonad m => MuseParser m (F MuseTableElement) @@ -755,53 +729,51 @@ tableParseFooter = fmap MuseFooterRow <$> tableParseRow 3 -- | Parse table caption. tableParseCaption :: PandocMonad m => MuseParser m (F MuseTableElement) -tableParseCaption = try $ do - many spaceChar - string "|+" - fmap MuseCaption <$> (trimInlinesF . mconcat <$> many1Till inline (string "+|")) +tableParseCaption = try $ fmap MuseCaption . trimInlinesF . mconcat + <$ many spaceChar + <* string "|+" + <*> many1Till inline (try $ string "+|") -- ** Inline parsers -inlineList :: PandocMonad m => [MuseParser m (F Inlines)] -inlineList = [ whitespace - , br - , anchor - , footnote - , strong - , strongTag - , emph - , emphTag - , underlined - , superscriptTag - , subscriptTag - , strikeoutTag - , verbatimTag - , classTag - , nbsp - , link - , code - , codeTag - , mathTag - , inlineLiteralTag - , str - , symbol - ] +inline' :: PandocMonad m => MuseParser m (F Inlines) +inline' = whitespace + <|> br + <|> anchor + <|> footnote + <|> strong + <|> strongTag + <|> emph + <|> emphTag + <|> underlined + <|> superscriptTag + <|> subscriptTag + <|> strikeoutTag + <|> verbatimTag + <|> classTag + <|> nbsp + <|> linkOrImage + <|> code + <|> codeTag + <|> mathTag + <|> inlineLiteralTag + <|> str + <|> symbol + <?> "inline" inline :: PandocMonad m => MuseParser m (F Inlines) -inline = endline <|> choice inlineList <?> "inline" +inline = endline <|> inline' -- | Parse a soft break. endline :: PandocMonad m => MuseParser m (F Inlines) -endline = try $ do - newline - notFollowedBy blankline - return $ return B.softbreak +endline = try $ pure B.softbreak <$ newline <* notFollowedBy blankline parseAnchor :: PandocMonad m => MuseParser m String -parseAnchor = try $ do - getPosition >>= \pos -> guard (sourceColumn pos == 1) - char '#' - (:) <$> letter <*> many (letter <|> digit <|> char '-') +parseAnchor = try $ (:) + <$ firstColumn + <* char '#' + <*> letter + <*> many (letter <|> digit <|> char '-') anchor :: PandocMonad m => MuseParser m (F Inlines) anchor = try $ do @@ -812,7 +784,7 @@ anchor = try $ do -- | Parse a footnote reference. footnote :: PandocMonad m => MuseParser m (F Inlines) footnote = try $ do - inLink <- museInLink <$> getState + inLink <- asks museInLink guard $ not inLink ref <- noteMarker return $ do @@ -825,33 +797,38 @@ footnote = try $ do return $ B.note contents' whitespace :: PandocMonad m => MuseParser m (F Inlines) -whitespace = try $ do - skipMany1 spaceChar - return $ return B.space +whitespace = try $ pure B.space <$ skipMany1 spaceChar -- | Parse @\<br>@ tag. br :: PandocMonad m => MuseParser m (F Inlines) -br = try $ do - string "<br>" - return $ return B.linebreak +br = try $ pure B.linebreak <$ string "<br>" emphasisBetween :: (PandocMonad m, Show a) => MuseParser m a -> MuseParser m (F Inlines) emphasisBetween c = try $ enclosedInlines c c +-- | Parses material enclosed between start and end parsers. +enclosed :: (Show end, Stream s m Char) => ParserT s st m t -- ^ start parser + -> ParserT s st m end -- ^ end parser + -> ParserT s st m a -- ^ content parser (to be used repeatedly) + -> ParserT s st m [a] +enclosed start end parser = try $ + start *> notFollowedBy spaceChar *> many1Till parser end + enclosedInlines :: (PandocMonad m, Show a, Show b) => MuseParser m a -> MuseParser m b -> MuseParser m (F Inlines) -enclosedInlines start end = try $ - trimInlinesF . mconcat <$> (enclosed (atStart start) end inline <* notFollowedBy (satisfy isLetter)) +enclosedInlines start end = try $ trimInlinesF . mconcat + <$> enclosed (atStart start) end inline + <* notFollowedBy (satisfy isAlphaNum) -- | Parse an inline tag, such as @\<em>@ and @\<strong>@. inlineTag :: PandocMonad m => String -- ^ Tag name -> MuseParser m (F Inlines) -inlineTag tag = try $ do - htmlTag (~== TagOpen tag []) - mconcat <$> manyTill inline (void $ htmlTag (~== TagClose tag)) +inlineTag tag = try $ mconcat + <$ openTag tag + <*> manyTill inline (closeTag tag) -- | Parse strong inline markup, indicated by @**@. strong :: PandocMonad m => MuseParser m (F Inlines) @@ -864,9 +841,9 @@ emph = fmap B.emph <$> emphasisBetween (char '*') -- | Parse underline inline markup, indicated by @_@. -- Supported only in Emacs Muse mode, not Text::Amuse. underlined :: PandocMonad m => MuseParser m (F Inlines) -underlined = do - guardDisabled Ext_amuse -- Supported only by Emacs Muse - fmap underlineSpan <$> emphasisBetween (char '_') +underlined = fmap underlineSpan + <$ guardDisabled Ext_amuse -- Supported only by Emacs Muse + <*> emphasisBetween (char '_') -- | Parse @\<strong>@ tag. strongTag :: PandocMonad m => MuseParser m (F Inlines) @@ -890,21 +867,20 @@ strikeoutTag = fmap B.strikeout <$> inlineTag "del" -- | Parse @\<verbatim>@ tag. verbatimTag :: PandocMonad m => MuseParser m (F Inlines) -verbatimTag = return . B.text . snd <$> htmlElement "verbatim" +verbatimTag = return . B.text + <$ openTag "verbatim" + <*> manyTill anyChar (closeTag "verbatim") -- | Parse @\<class>@ tag. classTag :: PandocMonad m => MuseParser m (F Inlines) classTag = do - (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "class" []) - res <- manyTill inline (void $ htmlTag (~== TagClose "class")) - let classes = maybe [] words $ lookup "name" attrs + classes <- maybe [] words . lookup "name" <$> openTag "class" + res <- manyTill inline $ closeTag "class" return $ B.spanWith ("", classes, []) <$> mconcat res -- | Parse "~~" as nonbreaking space. nbsp :: PandocMonad m => MuseParser m (F Inlines) -nbsp = try $ do - string "~~" - return $ return $ B.str "\160" +nbsp = try $ pure (B.str "\160") <$ string "~~" -- | Parse code markup, indicated by @\'=\'@ characters. code :: PandocMonad m => MuseParser m (F Inlines) @@ -914,26 +890,27 @@ code = try $ do guard $ not $ null contents guard $ head contents `notElem` " \t\n" guard $ last contents `notElem` " \t\n" - notFollowedBy $ satisfy isLetter + notFollowedBy $ satisfy isAlphaNum return $ return $ B.code contents -- | Parse @\<code>@ tag. codeTag :: PandocMonad m => MuseParser m (F Inlines) -codeTag = return . uncurry B.codeWith <$> htmlElement "code" +codeTag = fmap pure $ B.codeWith + <$> (htmlAttrToPandoc <$> openTag "code") + <*> manyTill anyChar (closeTag "code") -- | Parse @\<math>@ tag. -- @\<math>@ tag is an Emacs Muse extension enabled by @(require 'muse-latex2png)@ mathTag :: PandocMonad m => MuseParser m (F Inlines) -mathTag = return . B.math . snd <$> htmlElement "math" +mathTag = return . B.math + <$ openTag "math" + <*> manyTill anyChar (closeTag "math") -- | Parse inline @\<literal>@ tag as a raw inline. inlineLiteralTag :: PandocMonad m => MuseParser m (F Inlines) -inlineLiteralTag = - (return . rawInline) <$> htmlElement "literal" - where - -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML - format (_, _, kvs) = fromMaybe "html" $ lookup "style" kvs - rawInline (attrs, content) = B.rawInline (format attrs) content +inlineLiteralTag = try $ fmap pure $ B.rawInline + <$> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML + <*> manyTill anyChar (closeTag "literal") str :: PandocMonad m => MuseParser m (F Inlines) str = return . B.str <$> many1 alphaNum <* updateLastStrPos @@ -942,29 +919,58 @@ symbol :: PandocMonad m => MuseParser m (F Inlines) symbol = return . B.str <$> count 1 nonspaceChar -- | Parse a link or image. -link :: PandocMonad m => MuseParser m (F Inlines) -link = try $ do - st <- getState - guard $ not $ museInLink st - setState $ st{ museInLink = True } - (url, content) <- linkText - updateState (\state -> state { museInLink = False }) - return $ case stripPrefix "URL:" url of - Nothing -> if isImageUrl url - then B.image url "" <$> fromMaybe (return mempty) content - else B.link url "" <$> fromMaybe (return $ B.str url) content - Just url' -> B.link url' "" <$> fromMaybe (return $ B.str url') content - where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el - imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"] - isImageUrl = (`elem` imageExtensions) . takeExtension +linkOrImage :: PandocMonad m => MuseParser m (F Inlines) +linkOrImage = try $ do + inLink <- asks museInLink + guard $ not inLink + local (\s -> s { museInLink = True }) (explicitLink <|> image <|> link) linkContent :: PandocMonad m => MuseParser m (F Inlines) -linkContent = char '[' >> trimInlinesF . mconcat <$> manyTill inline (string "]") +linkContent = trimInlinesF . mconcat + <$ char '[' + <*> manyTill inline (char ']') + +-- | Parse a link starting with @URL:@ +explicitLink :: PandocMonad m => MuseParser m (F Inlines) +explicitLink = try $ do + string "[[URL:" + url <- manyTill anyChar $ char ']' + content <- option (pure $ B.str url) linkContent + char ']' + return $ B.link url "" <$> content -linkText :: PandocMonad m => MuseParser m (String, Maybe (F Inlines)) -linkText = do +image :: PandocMonad m => MuseParser m (F Inlines) +image = try $ do + string "[[" + (url, (ext, width, align)) <- manyUntil (noneOf "]") (imageExtensionAndOptions <* char ']') + content <- option mempty linkContent + char ']' + let widthAttr = case align of + Just 'f' -> [("width", fromMaybe "100" width ++ "%"), ("height", "75%")] + _ -> maybeToList (("width",) . (++ "%") <$> width) + let alignClass = case align of + Just 'r' -> ["align-right"] + Just 'l' -> ["align-left"] + Just 'f' -> [] + _ -> [] + return $ B.imageWith ("", alignClass, widthAttr) (url ++ ext) mempty <$> content + where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el + imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"] + imageExtension = choice (try . string <$> imageExtensions) + imageExtensionAndOptions = do + ext <- imageExtension + (width, align) <- option (Nothing, Nothing) imageAttrs + return (ext, width, align) + imageAttrs = (,) + <$ many1 spaceChar + <*> optionMaybe (many1 digit) + <* many spaceChar + <*> optionMaybe (oneOf "rlf") + +link :: PandocMonad m => MuseParser m (F Inlines) +link = try $ do string "[[" url <- manyTill anyChar $ char ']' content <- optionMaybe linkContent char ']' - return (url, content) + return $ B.link url "" <$> fromMaybe (return $ B.str url) content diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs index d3db3a9e2..9e8221248 100644 --- a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs +++ b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs @@ -100,7 +100,7 @@ liftA fun a = a >>^ fun -- | Duplicate a value to subsequently feed it into different arrows. -- Can almost always be replaced with '(&&&)', 'keepingTheValue', -- or even '(|||)'. --- Aequivalent to +-- Equivalent to -- > returnA &&& returnA duplicate :: (Arrow a) => a b (b,b) duplicate = arr $ join (,) @@ -114,7 +114,7 @@ infixr 2 >>% -- | Duplicate a value and apply an arrow to the second instance. --- Aequivalent to +-- Equivalent to -- > \a -> duplicate >>> second a -- or -- > \a -> returnA &&& a diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs index 6d96897aa..e76bbf5cf 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs @@ -50,7 +50,7 @@ class (Eq nsID, Ord nsID) => NameSpaceID nsID where getNamespaceID :: NameSpaceIRI -> NameSpaceIRIs nsID -> Maybe (NameSpaceIRIs nsID, nsID) - -- | Given a namespace id, lookup its IRI. May be overriden for performance. + -- | Given a namespace id, lookup its IRI. May be overridden for performance. getIRI :: nsID -> NameSpaceIRIs nsID -> Maybe NameSpaceIRI diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs index 616d9290b..45c6cd58c 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs @@ -61,7 +61,7 @@ import qualified Data.Foldable as F (Foldable, foldr) import Data.Maybe --- | Aequivalent to +-- | Equivalent to -- > foldr (.) id -- where '(.)' are 'id' are the ones from "Control.Category" -- and 'foldr' is the one from "Data.Foldable". @@ -72,7 +72,7 @@ import Data.Maybe composition :: (Category cat, F.Foldable f) => f (cat a a) -> cat a a composition = F.foldr (<<<) Cat.id --- | Aequivalent to +-- | Equivalent to -- > foldr (flip (.)) id -- where '(.)' are 'id' are the ones from "Control.Category" -- and 'foldr' is the one from "Data.Foldable". @@ -133,9 +133,7 @@ class Lookupable a where -- can be used directly in almost any case. readLookupables :: (Lookupable a) => String -> [(a,String)] readLookupables s = [ (a,rest) | (word,rest) <- lex s, - let result = lookup word lookupTable, - isJust result, - let Just a = result + a <- maybeToList (lookup word lookupTable) ] -- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer. diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs index 81392e16b..2327ea908 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -261,7 +261,7 @@ convertingExtraState v a = withSubStateF setVAsExtraState modifyWithA -- The resulting converter even behaves like an identity converter on the -- value level. -- --- Aequivalent to +-- Equivalent to -- -- > \v x a -> convertingExtraState v (returnV x >>> a) -- diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs index e0444559b..6a1682829 100644 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs @@ -138,7 +138,7 @@ fontPitchReader = executeIn NsOffice "font-face-decls" ( lookupDefaultingAttr NsStyle "font-pitch" )) >>?^ ( M.fromList . foldl accumLegalPitches [] ) - ) + ) `ifFailedDo` (returnV (Right M.empty)) where accumLegalPitches ls (Nothing,_) = ls accumLegalPitches ls (Just n,p) = (n,p):ls @@ -342,7 +342,7 @@ instance Read XslUnit where readsPrec _ _ = [] -- | Rough conversion of measures into millimetres. --- Pixels and em's are actually implementation dependant/relative measures, +-- Pixels and em's are actually implementation dependent/relative measures, -- so I could not really easily calculate anything exact here even if I wanted. -- But I do not care about exactness right now, as I only use measures -- to determine if a paragraph is "indented" or not. diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index 888cd9307..1c52c3477 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -40,7 +40,7 @@ import Text.Pandoc.Readers.Org.Meta (metaExport, metaKey, metaLine) import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename, - originalLang, translateLang) + originalLang, translateLang, exportsCode) import Text.Pandoc.Builder (Blocks, Inlines) import Text.Pandoc.Class (PandocMonad) @@ -314,9 +314,6 @@ codeBlock blockAttrs blockType = do labelledBlock :: F Inlines -> F Blocks labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) - exportsCode :: [(String, String)] -> Bool - exportsCode = maybe True (`elem` ["code", "both"]) . lookup "exports" - exportsResults :: [(String, String)] -> Bool exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports" @@ -743,7 +740,7 @@ latexEnd envName = try $ -- --- Footnote defintions +-- Footnote definitions -- noteBlock :: PandocMonad m => OrgParser m (F Blocks) noteBlock = try $ do diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs index c9465581a..7d55892fe 100644 --- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs +++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {- Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -17,8 +16,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE NoImplicitPrelude #-} {- | Module : Text.Pandoc.Readers.Org.DocumentTree Copyright : Copyright (C) 2014-2018 Albert Krewinkel @@ -45,7 +43,7 @@ import Text.Pandoc.Readers.Org.BlockStarts import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing -import qualified Data.Map as Map +import qualified Data.Set as Set import qualified Text.Pandoc.Builder as B -- @@ -60,7 +58,7 @@ documentTree :: PandocMonad m documentTree blocks inline = do initialBlocks <- blocks headlines <- sequence <$> manyTill (headline blocks inline 1) eof - title <- fmap (getTitle . unMeta) . orgStateMeta <$> getState + title <- fmap docTitle . orgStateMeta <$> getState return $ do headlines' <- headlines initialBlocks' <- initialBlocks @@ -70,19 +68,11 @@ documentTree blocks inline = do , headlineTodoMarker = Nothing , headlineText = B.fromList title' , headlineTags = mempty + , headlinePlanning = emptyPlanning , headlineProperties = mempty , headlineContents = initialBlocks' , headlineChildren = headlines' } - where - getTitle :: Map.Map String MetaValue -> [Inline] - getTitle metamap = - case Map.lookup "title" metamap of - Just (MetaInlines inlns) -> inlns - _ -> [] - -newtype Tag = Tag { fromTag :: String } - deriving (Show, Eq) -- | Create a tag containing the given string. toTag :: String -> Tag @@ -117,6 +107,7 @@ data Headline = Headline , headlineTodoMarker :: Maybe TodoMarker , headlineText :: Inlines , headlineTags :: [Tag] + , headlinePlanning :: PlanningInfo -- ^ subtree planning information , headlineProperties :: Properties , headlineContents :: Blocks , headlineChildren :: [Headline] @@ -136,6 +127,7 @@ headline blocks inline lvl = try $ do title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle tags <- option [] headerTags newline + planning <- option emptyPlanning planningInfo properties <- option mempty propertiesDrawer contents <- blocks children <- many (headline blocks inline (level + 1)) @@ -148,6 +140,7 @@ headline blocks inline lvl = try $ do , headlineTodoMarker = todoKw , headlineText = title' , headlineTags = tags + , headlinePlanning = planning , headlineProperties = properties , headlineContents = contents' , headlineChildren = children' @@ -158,22 +151,27 @@ headline blocks inline lvl = try $ do headerTags :: Monad m => OrgParser m [Tag] headerTags = try $ - let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':' + let tag = orgTagWord <* char ':' in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces) -- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks headlineToBlocks :: Monad m => Headline -> OrgParser m Blocks -headlineToBlocks hdln@Headline {..} = do - maxHeadlineLevels <- getExportSetting exportHeadlineLevels +headlineToBlocks hdln = do + maxLevel <- getExportSetting exportHeadlineLevels + let tags = headlineTags hdln + let text = headlineText hdln + let level = headlineLevel hdln + shouldNotExport <- hasDoNotExportTag tags case () of - _ | any isNoExportTag headlineTags -> return mempty - _ | any isArchiveTag headlineTags -> archivedHeadlineToBlocks hdln - _ | isCommentTitle headlineText -> return mempty - _ | headlineLevel >= maxHeadlineLevels -> headlineToHeaderWithList hdln - _ | otherwise -> headlineToHeaderWithContents hdln + _ | shouldNotExport -> return mempty + _ | any isArchiveTag tags -> archivedHeadlineToBlocks hdln + _ | isCommentTitle text -> return mempty + _ | maxLevel <= level -> headlineToHeaderWithList hdln + _ | otherwise -> headlineToHeaderWithContents hdln -isNoExportTag :: Tag -> Bool -isNoExportTag = (== toTag "noexport") +hasDoNotExportTag :: Monad m => [Tag] -> OrgParser m Bool +hasDoNotExportTag tags = containsExcludedTag . orgStateExcludedTags <$> getState + where containsExcludedTag s = any (`Set.member` s) tags isArchiveTag :: Tag -> Bool isArchiveTag = (== toTag "ARCHIVE") @@ -182,8 +180,9 @@ isArchiveTag = (== toTag "ARCHIVE") -- FIXME: This accesses builder internals not intended for use in situations -- like these. Replace once keyword parsing is supported. isCommentTitle :: Inlines -> Bool -isCommentTitle (B.toList -> (Str "COMMENT":_)) = True -isCommentTitle _ = False +isCommentTitle inlns = case B.toList inlns of + (Str "COMMENT":_) -> True + _ -> False archivedHeadlineToBlocks :: Monad m => Headline -> OrgParser m Blocks archivedHeadlineToBlocks hdln = do @@ -194,17 +193,23 @@ archivedHeadlineToBlocks hdln = do ArchivedTreesHeadlineOnly -> headlineToHeader hdln headlineToHeaderWithList :: Monad m => Headline -> OrgParser m Blocks -headlineToHeaderWithList hdln@Headline {..} = do +headlineToHeaderWithList hdln = do maxHeadlineLevels <- getExportSetting exportHeadlineLevels header <- headlineToHeader hdln - listElements <- mapM headlineToBlocks headlineChildren + listElements <- mapM headlineToBlocks (headlineChildren hdln) + planningBlock <- planningToBlock (headlinePlanning hdln) let listBlock = if null listElements then mempty else B.orderedList listElements - let headerText = if maxHeadlineLevels == headlineLevel + let headerText = if maxHeadlineLevels == headlineLevel hdln then header else flattenHeader header - return $ headerText <> headlineContents <> listBlock + return . mconcat $ + [ headerText + , headlineContents hdln + , planningBlock + , listBlock + ] where flattenHeader :: Blocks -> Blocks flattenHeader blks = @@ -213,27 +218,28 @@ headlineToHeaderWithList hdln@Headline {..} = do _ -> mempty headlineToHeaderWithContents :: Monad m => Headline -> OrgParser m Blocks -headlineToHeaderWithContents hdln@Headline {..} = do +headlineToHeaderWithContents hdln = do header <- headlineToHeader hdln - childrenBlocks <- mconcat <$> mapM headlineToBlocks headlineChildren - return $ header <> headlineContents <> childrenBlocks + planningBlock <- planningToBlock (headlinePlanning hdln) + childrenBlocks <- mconcat <$> mapM headlineToBlocks (headlineChildren hdln) + return $ header <> planningBlock <> headlineContents hdln <> childrenBlocks headlineToHeader :: Monad m => Headline -> OrgParser m Blocks -headlineToHeader Headline {..} = do +headlineToHeader hdln = do exportTodoKeyword <- getExportSetting exportWithTodoKeywords exportTags <- getExportSetting exportWithTags let todoText = if exportTodoKeyword - then case headlineTodoMarker of + then case headlineTodoMarker hdln of Just kw -> todoKeywordToInlines kw <> B.space Nothing -> mempty else mempty - let text = todoText <> headlineText <> + let text = todoText <> headlineText hdln <> if exportTags - then tagsToInlines headlineTags + then tagsToInlines (headlineTags hdln) else mempty - let propAttr = propertiesToAttr headlineProperties - attr <- registerHeader propAttr headlineText - return $ B.headerWith attr headlineLevel text + let propAttr = propertiesToAttr (headlineProperties hdln) + attr <- registerHeader propAttr (headlineText hdln) + return $ B.headerWith attr (headlineLevel hdln) text todoKeyword :: Monad m => OrgParser m TodoMarker todoKeyword = try $ do @@ -277,9 +283,60 @@ tagsToInlines tags = tagSpan :: Tag -> Inlines -> Inlines tagSpan t = B.spanWith ("", ["tag"], [("tag-name", fromTag t)]) +-- | Render planning info as a block iff the respective export setting is +-- enabled. +planningToBlock :: Monad m => PlanningInfo -> OrgParser m Blocks +planningToBlock planning = do + includePlanning <- getExportSetting exportWithPlanning + return $ + if includePlanning + then B.plain . mconcat . intersperse B.space . filter (/= mempty) $ + [ datumInlines planningClosed "CLOSED" + , datumInlines planningDeadline "DEADLINE" + , datumInlines planningScheduled "SCHEDULED" + ] + else mempty + where + datumInlines field name = + case field planning of + Nothing -> mempty + Just time -> B.strong (B.str name <> B.str ":") + <> B.space + <> B.emph (B.str time) + +-- | An Org timestamp, including repetition marks. TODO: improve +type Timestamp = String + +timestamp :: Monad m => OrgParser m Timestamp +timestamp = try $ do + openChar <- oneOf "<[" + let isActive = openChar == '<' + let closeChar = if isActive then '>' else ']' + content <- many1Till anyChar (char closeChar) + return (openChar : content ++ [closeChar]) + +-- | Planning information for a subtree/headline. +data PlanningInfo = PlanningInfo + { planningClosed :: Maybe Timestamp + , planningDeadline :: Maybe Timestamp + , planningScheduled :: Maybe Timestamp + } +emptyPlanning :: PlanningInfo +emptyPlanning = PlanningInfo Nothing Nothing Nothing - +-- | Read a single planning-related and timestamped line. +planningInfo :: Monad m => OrgParser m PlanningInfo +planningInfo = try $ do + updaters <- many1 planningDatum <* skipSpaces <* newline + return $ foldr ($) emptyPlanning updaters + where + planningDatum = skipSpaces *> choice + [ updateWith (\s p -> p { planningScheduled = Just s}) "SCHEDULED" + , updateWith (\d p -> p { planningDeadline = Just d}) "DEADLINE" + , updateWith (\c p -> p { planningClosed = Just c}) "CLOSED" + ] + updateWith fn cs = fn <$> (string cs *> char ':' *> skipSpaces *> timestamp) -- | Read a :PROPERTIES: drawer and return the key/value pairs contained -- within. diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs index d02eb37c5..f79ee0d64 100644 --- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs +++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs @@ -69,7 +69,7 @@ exportSetting = choice , integerSetting "H" (\val es -> es { exportHeadlineLevels = val }) , ignoredSetting "inline" , ignoredSetting "num" - , ignoredSetting "p" + , booleanSetting "p" (\val es -> es { exportWithPlanning = val }) , ignoredSetting "pri" , ignoredSetting "prop" , ignoredSetting "stat" diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 7d1568b80..a5335ca57 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -39,7 +39,7 @@ import Text.Pandoc.Readers.Org.BlockStarts (endOfBlock, noteMarker) import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename, - originalLang, translateLang) + originalLang, translateLang, exportsCode) import Text.Pandoc.Builder (Inlines) import qualified Text.Pandoc.Builder as B @@ -510,7 +510,7 @@ anchor = try $ do <* string ">>" <* skipSpaces --- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors +-- | Replace every char but [a-zA-Z0-9_.-:] with a hyphen '-'. This mirrors -- the org function @org-export-solidify-link-text@. solidify :: String -> String @@ -525,11 +525,13 @@ inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines) inlineCodeBlock = try $ do string "src_" lang <- many1 orgArgWordChar - opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption + opts <- option [] $ try (enclosedByPair '[' ']' inlineBlockOption) + <|> (mempty <$ string "[]") inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r") let attrClasses = [translateLang lang] let attrKeyVal = originalLang lang <> opts - returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode + let codeInlineBlck = B.codeWith ("", attrClasses, attrKeyVal) inlineCode + returnF $ (if exportsCode opts then codeInlineBlck else mempty) where inlineBlockOption :: PandocMonad m => OrgParser m (String, String) inlineBlockOption = try $ do diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 965e33d94..cad1d7123 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -52,6 +52,7 @@ import Data.Char (toLower) import Data.List (intersperse) import Data.Maybe (fromMaybe) import qualified Data.Map as M +import qualified Data.Set as Set import Network.HTTP (urlEncode) -- | Returns the current meta, respecting export options. @@ -158,6 +159,7 @@ optionLine = try $ do "seq_todo" -> todoSequence >>= updateState . registerTodoSequence "typ_todo" -> todoSequence >>= updateState . registerTodoSequence "macro" -> macroDefinition >>= updateState . registerMacro + "exclude_tags" -> excludedTagList >>= updateState . setExcludedTags "pandoc-emphasis-pre" -> emphChars >>= updateState . setEmphasisPreChar "pandoc-emphasis-post" -> emphChars >>= updateState . setEmphasisPostChar _ -> mzero @@ -190,6 +192,18 @@ parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) +excludedTagList :: Monad m => OrgParser m [Tag] +excludedTagList = do + skipSpaces + map Tag <$> many (orgTagWord <* skipSpaces) <* newline + +setExcludedTags :: [Tag] -> OrgParserState -> OrgParserState +setExcludedTags tagList st = + let finalSet = if orgStateExcludedTagsChanged st + then foldr Set.insert (orgStateExcludedTags st) tagList + else Set.fromList tagList + in st { orgStateExcludedTags = finalSet, orgStateExcludedTagsChanged = True} + setEmphasisPreChar :: Maybe [Char] -> OrgParserState -> OrgParserState setEmphasisPreChar csMb st = let preChars = fromMaybe (orgStateEmphasisPostChars defaultOrgParserState) csMb diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index 4cb5bb626..59478256f 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -33,6 +33,7 @@ module Text.Pandoc.Readers.Org.ParserState , defaultOrgParserState , OrgParserLocal (..) , OrgNoteRecord + , Tag(..) , HasReaderOptions (..) , HasQuoteContext (..) , HasMacros (..) @@ -88,6 +89,9 @@ type OrgNoteTable = [OrgNoteRecord] type OrgLinkFormatters = M.Map String (String -> String) -- | Macro expander function type MacroExpander = [String] -> String +-- | Tag +newtype Tag = Tag { fromTag :: String } + deriving (Show, Eq, Ord) -- | The states in which a todo item can be data TodoState = Todo | Done @@ -113,6 +117,8 @@ data OrgParserState = OrgParserState -- specified here. , orgStateEmphasisPostChars :: [Char] -- ^ Chars allowed at after emphasis , orgStateEmphasisNewlines :: Maybe Int + , orgStateExcludedTags :: Set.Set Tag + , orgStateExcludedTagsChanged :: Bool , orgStateExportSettings :: ExportSettings , orgStateHeaderMap :: M.Map Inlines String , orgStateIdentifiers :: Set.Set String @@ -183,6 +189,8 @@ defaultOrgParserState = OrgParserState , orgStateEmphasisCharStack = [] , orgStateEmphasisNewlines = Nothing , orgStateExportSettings = def + , orgStateExcludedTags = Set.singleton $ Tag "noexport" + , orgStateExcludedTagsChanged = False , orgStateHeaderMap = M.empty , orgStateIdentifiers = Set.empty , orgStateIncludeFiles = [] @@ -260,6 +268,7 @@ data ExportSettings = ExportSettings , exportWithAuthor :: Bool -- ^ Include author in final meta-data , exportWithCreator :: Bool -- ^ Include creator in final meta-data , exportWithEmail :: Bool -- ^ Include email in final meta-data + , exportWithPlanning :: Bool -- ^ Keep planning info after headlines , exportWithTags :: Bool -- ^ Keep tags as part of headlines , exportWithTodoKeywords :: Bool -- ^ Keep TODO keywords in headers } @@ -280,6 +289,7 @@ defaultExportSettings = ExportSettings , exportWithAuthor = True , exportWithCreator = True , exportWithEmail = True + , exportWithPlanning = False , exportWithTags = True , exportWithTodoKeywords = True } diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index e014de65e..52a346e36 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -46,6 +46,8 @@ module Text.Pandoc.Readers.Org.Parsing , orgArgKey , orgArgWord , orgArgWordChar + , orgTagWord + , orgTagWordChar -- * Re-exports from Text.Pandoc.Parser , ParserContext (..) , many1Till @@ -137,14 +139,13 @@ anyLine = <* updateLastPreCharPos <* updateLastForbiddenCharPos --- The version Text.Pandoc.Parsing cannot be used, as we need additional parts --- of the state saved and restored. +-- | Like @'Text.Pandoc.Parsing'@, but resets the position of the last character +-- allowed before emphasised text. parseFromString :: Monad m => OrgParser m a -> String -> OrgParser m a parseFromString parser str' = do - oldLastPreCharPos <- orgStateLastPreCharPos <$> getState updateState $ \s -> s{ orgStateLastPreCharPos = Nothing } result <- P.parseFromString parser str' - updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos } + updateState $ \s -> s { orgStateLastPreCharPos = Nothing } return result -- | Skip one or more tab or space characters. @@ -221,3 +222,9 @@ orgArgWord = many1 orgArgWordChar -- | Chars treated as part of a word in plists. orgArgWordChar :: Monad m => OrgParser m Char orgArgWordChar = alphaNum <|> oneOf "-_" + +orgTagWord :: Monad m => OrgParser m String +orgTagWord = many1 orgTagWordChar + +orgTagWordChar :: Monad m => OrgParser m Char +orgTagWordChar = alphaNum <|> oneOf "@%#_" diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs index 17fe34738..71d1dd517 100644 --- a/src/Text/Pandoc/Readers/Org/Shared.hs +++ b/src/Text/Pandoc/Readers/Org/Shared.hs @@ -32,6 +32,7 @@ module Text.Pandoc.Readers.Org.Shared , isImageFilename , originalLang , translateLang + , exportsCode ) where import Prelude @@ -96,3 +97,6 @@ translateLang cs = "sh" -> "bash" "sqlite" -> "sql" _ -> cs + +exportsCode :: [(String, String)] -> Bool +exportsCode = maybe True (`elem` ["code", "both"]) . lookup "exports" diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 71a38cf82..28fa7b83e 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -37,7 +37,7 @@ import Control.Arrow (second) import Control.Monad (forM_, guard, liftM, mplus, mzero, when) import Control.Monad.Except (throwError) import Control.Monad.Identity (Identity (..)) -import Data.Char (isHexDigit, isSpace, toLower, toUpper) +import Data.Char (isHexDigit, isSpace, toLower, toUpper, isAlphaNum) import Data.List (deleteFirstsBy, elemIndex, intercalate, isInfixOf, isSuffixOf, nub, sort, transpose, union) import qualified Data.Map as M @@ -172,6 +172,7 @@ parseRST = do docMinusKeys <- concat <$> manyTill (referenceKey <|> anchorDef <|> noteBlock <|> citationBlock <|> + (snd <$> withRaw comment) <|> headerBlock <|> lineClump) eof setInput docMinusKeys setPosition startPos @@ -1089,7 +1090,7 @@ referenceKey = do targetURI :: Monad m => ParserT [Char] st m [Char] targetURI = do skipSpaces - optional newline + optional $ try $ newline >> notFollowedBy blankline contents <- trim <$> many1 (satisfy (/='\n') <|> try (newline >> many1 spaceChar >> noneOf " \t\n")) @@ -1313,19 +1314,24 @@ table = gridTable False <|> simpleTable False <|> inline :: PandocMonad m => RSTParser m Inlines inline = choice [ note -- can start with whitespace, so try before ws - , whitespace , link - , str - , endline , strong , emph , code , subst , interpretedRole - , smart - , hyphens - , escapedChar - , symbol ] <?> "inline" + , inlineContent ] <?> "inline" + +-- strings, spaces and other characters that can appear either by +-- themselves or within inline markup +inlineContent :: PandocMonad m => RSTParser m Inlines +inlineContent = choice [ whitespace + , str + , endline + , smart + , hyphens + , escapedChar + , symbol ] <?> "inline content" parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines parseInlineFromString = parseFromString' (trimInlines . mconcat <$> many inline) @@ -1368,11 +1374,11 @@ atStart p = do emph :: PandocMonad m => RSTParser m Inlines emph = B.emph . trimInlines . mconcat <$> - enclosed (atStart $ char '*') (char '*') inline + enclosed (atStart $ char '*') (char '*') inlineContent strong :: PandocMonad m => RSTParser m Inlines strong = B.strong . trimInlines . mconcat <$> - enclosed (atStart $ string "**") (try $ string "**") inline + enclosed (atStart $ string "**") (try $ string "**") inlineContent -- Note, this doesn't precisely implement the complex rule in -- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules @@ -1380,7 +1386,6 @@ strong = B.strong . trimInlines . mconcat <$> -- -- TODO: -- - Classes are silently discarded in addNewRole --- - Lacks sensible implementation for title-reference (which is the default) -- - Allows direct use of the :raw: role, rST only allows inherited use. interpretedRole :: PandocMonad m => RSTParser m Inlines interpretedRole = try $ do @@ -1390,12 +1395,12 @@ interpretedRole = try $ do renderRole :: PandocMonad m => String -> Maybe String -> String -> Attr -> RSTParser m Inlines renderRole contents fmt role attr = case role of - "sup" -> return $ B.superscript $ B.str contents - "superscript" -> return $ B.superscript $ B.str contents - "sub" -> return $ B.subscript $ B.str contents - "subscript" -> return $ B.subscript $ B.str contents - "emphasis" -> return $ B.emph $ B.str contents - "strong" -> return $ B.strong $ B.str contents + "sup" -> return $ B.superscript $ treatAsText contents + "superscript" -> return $ B.superscript $ treatAsText contents + "sub" -> return $ B.subscript $ treatAsText contents + "subscript" -> return $ B.subscript $ treatAsText contents + "emphasis" -> return $ B.emph $ treatAsText contents + "strong" -> return $ B.strong $ treatAsText contents "rfc-reference" -> return $ rfcLink contents "RFC" -> return $ rfcLink contents "pep-reference" -> return $ pepLink contents @@ -1406,7 +1411,7 @@ renderRole contents fmt role attr = case role of "title" -> titleRef contents "t" -> titleRef contents "code" -> return $ B.codeWith (addClass "sourceCode" attr) contents - "span" -> return $ B.spanWith attr $ B.str contents + "span" -> return $ B.spanWith attr $ treatAsText contents "raw" -> return $ B.rawInline (fromMaybe "" fmt) contents custom -> do customRoles <- stateRstCustomRoles <$> getState @@ -1414,14 +1419,20 @@ renderRole contents fmt role attr = case role of Just (newRole, newFmt, newAttr) -> renderRole contents newFmt newRole newAttr Nothing -> -- undefined role - return $ B.spanWith ("",[],[("role",role)]) (B.str contents) + return $ B.codeWith ("",["interpreted-text"],[("role",role)]) + contents where - titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour + titleRef ref = return $ B.spanWith ("",["title-ref"],[]) $ treatAsText ref rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo) where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html" pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo) where padNo = replicate (4 - length pepNo) '0' ++ pepNo pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/" + treatAsText = B.text . handleEscapes + handleEscapes [] = [] + handleEscapes ('\\':' ':cs) = handleEscapes cs + handleEscapes ('\\':c:cs) = c : handleEscapes cs + handleEscapes (c:cs) = c : handleEscapes cs addClass :: String -> Attr -> Attr addClass c (ident, classes, keyValues) = (ident, classes `union` [c], keyValues) @@ -1445,7 +1456,18 @@ roleAfter = try $ do return (role,contents) unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char] -unmarkedInterpretedText = enclosed (atStart $ char '`') (char '`') anyChar +unmarkedInterpretedText = try $ do + atStart (char '`') + contents <- mconcat <$> (many1 + ( many1 (noneOf "`\\\n") + <|> (char '\\' >> ((\c -> ['\\',c]) <$> noneOf "\n")) + <|> (string "\n" <* notFollowedBy blankline) + <|> try (string "`" <* + notFollowedBy (() <$ roleMarker) <* + lookAhead (satisfy isAlphaNum)) + )) + char '`' + return contents whitespace :: PandocMonad m => RSTParser m Inlines whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace" @@ -1480,7 +1502,7 @@ explicitLink = try $ do char '`' notFollowedBy (char '`') -- `` marks start of inline code label' <- trimInlines . mconcat <$> - manyTill (notFollowedBy (char '`') >> inline) (char '<') + manyTill (notFollowedBy (char '`') >> inlineContent) (char '<') src <- trim <$> manyTill (noneOf ">\n") (char '>') skipSpaces string "`_" diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index 1f230ae7e..c3cfedcfb 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -74,9 +74,6 @@ type TWParser = ParserT [Char] ParserState tryMsg :: String -> TWParser m a -> TWParser m a tryMsg msg p = try p <?> msg -skip :: TWParser m a -> TWParser m () -skip parser = parser >> return () - nested :: PandocMonad m => TWParser m a -> TWParser m a nested p = do nestlevel <- stateMaxNestingLevel <$> getState @@ -92,7 +89,7 @@ htmlElement tag = tryMsg tag $ do content <- manyTill anyChar (endtag <|> endofinput) return (htmlAttrToPandoc attr, trim content) where - endtag = skip $ htmlTag (~== TagClose tag) + endtag = void $ htmlTag (~== TagClose tag) endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse @@ -114,18 +111,15 @@ parseHtmlContentWithAttrs tag parser = do endOfContent = try $ skipMany blankline >> skipSpaces >> eof parseHtmlContent :: PandocMonad m => String -> TWParser m a -> TWParser m [a] -parseHtmlContent tag p = parseHtmlContentWithAttrs tag p >>= return . snd +parseHtmlContent tag p = snd <$> parseHtmlContentWithAttrs tag p -- -- main parser -- parseTWiki :: PandocMonad m => TWParser m Pandoc -parseTWiki = do - bs <- mconcat <$> many block - spaces - eof - return $ B.doc bs +parseTWiki = + B.doc . mconcat <$> many block <* spaces <* eof -- @@ -158,7 +152,7 @@ separator = tryMsg "separator" $ string "---" >> newline >> return B.horizontalR header :: PandocMonad m => TWParser m B.Blocks header = tryMsg "header" $ do string "---" - level <- many1 (char '+') >>= return . length + level <- length <$> many1 (char '+') guard $ level <= 6 classes <- option [] $ string "!!" >> return ["unnumbered"] skipSpaces @@ -167,11 +161,10 @@ header = tryMsg "header" $ do return $ B.headerWith attr level content verbatim :: PandocMonad m => TWParser m B.Blocks -verbatim = (htmlElement "verbatim" <|> htmlElement "pre") - >>= return . (uncurry B.codeBlockWith) +verbatim = uncurry B.codeBlockWith <$> (htmlElement "verbatim" <|> htmlElement "pre") literal :: PandocMonad m => TWParser m B.Blocks -literal = htmlElement "literal" >>= return . rawBlock +literal = rawBlock <$> htmlElement "literal" where format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs rawBlock (attrs, content) = B.rawBlock (format attrs) content @@ -183,7 +176,7 @@ list prefix = choice [ bulletList prefix definitionList :: PandocMonad m => String -> TWParser m B.Blocks definitionList prefix = tryMsg "definitionList" $ do - indent <- lookAhead $ string prefix *> (many1 $ string " ") <* string "$ " + indent <- lookAhead $ string prefix *> many1 (string " ") <* string "$ " elements <- many $ parseDefinitionListItem (prefix ++ concat indent) return $ B.definitionList elements where @@ -193,7 +186,7 @@ definitionList prefix = tryMsg "definitionList" $ do string (indent ++ "$ ") >> skipSpaces term <- many1Till inline $ string ": " line <- listItemLine indent $ string "$ " - return $ (mconcat term, [line]) + return (mconcat term, [line]) bulletList :: PandocMonad m => String -> TWParser m B.Blocks bulletList prefix = tryMsg "bulletList" $ @@ -227,25 +220,24 @@ parseListItem prefix marker = string prefix >> marker >> listItemLine prefix mar listItemLine :: (PandocMonad m, Show a) => String -> TWParser m a -> TWParser m B.Blocks -listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat +listItemLine prefix marker = mconcat <$> (lineContent >>= parseContent) where lineContent = do content <- anyLine continuation <- optionMaybe listContinuation - return $ filterSpaces content ++ "\n" ++ (maybe "" (" " ++) continuation) + return $ filterSpaces content ++ "\n" ++ maybe "" (" " ++) continuation filterSpaces = reverse . dropWhile (== ' ') . reverse listContinuation = notFollowedBy (string prefix >> marker) >> string " " >> lineContent parseContent = parseFromString' $ many1 $ nestedList <|> parseInline - parseInline = many1Till inline (lastNewline <|> newlineBeforeNestedList) >>= - return . B.plain . mconcat + parseInline = (B.plain . mconcat) <$> many1Till inline (lastNewline <|> newlineBeforeNestedList) nestedList = list prefix lastNewline = try $ char '\n' <* eof newlineBeforeNestedList = try $ char '\n' <* lookAhead nestedList table :: PandocMonad m => TWParser m B.Blocks table = try $ do - tableHead <- optionMaybe $ many1Till tableParseHeader newline >>= return . unzip + tableHead <- optionMaybe (unzip <$> many1Till tableParseHeader newline) rows <- many1 tableParseRow return $ buildTable mempty rows $ fromMaybe (align rows, columns rows) tableHead where @@ -258,11 +250,11 @@ table = try $ do tableParseHeader :: PandocMonad m => TWParser m ((Alignment, Double), B.Blocks) tableParseHeader = try $ do char '|' - leftSpaces <- many spaceChar >>= return . length + leftSpaces <- length <$> many spaceChar char '*' content <- tableColumnContent (char '*' >> skipSpaces >> char '|') char '*' - rightSpaces <- many spaceChar >>= return . length + rightSpaces <- length <$> many spaceChar optional tableEndOfRow return (tableAlign leftSpaces rightSpaces, content) where @@ -283,13 +275,13 @@ tableEndOfRow :: PandocMonad m => TWParser m Char tableEndOfRow = lookAhead (try $ char '|' >> char '\n') >> char '|' tableColumnContent :: PandocMonad m => TWParser m a -> TWParser m B.Blocks -tableColumnContent end = manyTill content (lookAhead $ try end) >>= return . B.plain . mconcat +tableColumnContent end = (B.plain . mconcat) <$> manyTill content (lookAhead $ try end) where content = continuation <|> inline continuation = try $ char '\\' >> newline >> return mempty blockQuote :: PandocMonad m => TWParser m B.Blocks -blockQuote = parseHtmlContent "blockquote" block >>= return . B.blockQuote . mconcat +blockQuote = (B.blockQuote . mconcat) <$> parseHtmlContent "blockquote" block noautolink :: PandocMonad m => TWParser m B.Blocks noautolink = do @@ -300,15 +292,15 @@ noautolink = do setState $ st{ stateAllowLinks = True } return $ mconcat blocks where - parseContent = parseFromString' $ many $ block + parseContent = parseFromString' $ many block para :: PandocMonad m => TWParser m B.Blocks -para = many1Till inline endOfParaElement >>= return . result . mconcat +para = (result . mconcat) <$> many1Till inline endOfParaElement where endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement endOfInput = try $ skipMany blankline >> skipSpaces >> eof endOfPara = try $ blankline >> skipMany1 blankline - newBlockElement = try $ blankline >> skip blockElements + newBlockElement = try $ blankline >> void blockElements result content = if F.all (==Space) content then mempty else B.para $ B.trimInlines content @@ -340,7 +332,7 @@ inline = choice [ whitespace ] <?> "inline" whitespace :: PandocMonad m => TWParser m B.Inlines -whitespace = (lb <|> regsp) >>= return +whitespace = lb <|> regsp where lb = try $ skipMany spaceChar >> linebreak >> return B.space regsp = try $ skipMany1 spaceChar >> return B.space @@ -362,13 +354,13 @@ enclosed :: (Monoid b, PandocMonad m, Show a) => TWParser m a -> (TWParser m a -> TWParser m b) -> TWParser m b enclosed sep p = between sep (try $ sep <* endMarker) p where - endMarker = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|") <|> eof + endMarker = lookAhead $ void endSpace <|> void (oneOf ".,!?:)|") <|> eof endSpace = (spaceChar <|> newline) >> return B.space macro :: PandocMonad m => TWParser m B.Inlines macro = macroWithParameters <|> withoutParameters where - withoutParameters = enclosed (char '%') (\_ -> macroName) >>= return . emptySpan + withoutParameters = emptySpan <$> enclosed (char '%') (const macroName) emptySpan name = buildSpan name [] mempty macroWithParameters :: PandocMonad m => TWParser m B.Inlines @@ -393,13 +385,13 @@ macroName = do return (first:rest) attributes :: PandocMonad m => TWParser m (String, [(String, String)]) -attributes = char '{' *> spnl *> many (attribute <* spnl) <* char '}' >>= - return . foldr (either mkContent mkKvs) ([], []) +attributes = foldr (either mkContent mkKvs) ([], []) + <$> (char '{' *> spnl *> many (attribute <* spnl) <* char '}') where spnl = skipMany (spaceChar <|> newline) mkContent c ([], kvs) = (c, kvs) mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs) - mkKvs kv (cont, rest) = (cont, (kv : rest)) + mkKvs kv (cont, rest) = (cont, kv : rest) attribute :: PandocMonad m => TWParser m (Either String (String, String)) attribute = withKey <|> withoutKey @@ -407,52 +399,50 @@ attribute = withKey <|> withoutKey withKey = try $ do key <- macroName char '=' - parseValue False >>= return . (curry Right key) - withoutKey = try $ parseValue True >>= return . Left - parseValue allowSpaces = (withQuotes <|> withoutQuotes allowSpaces) >>= return . fromEntities + curry Right key <$> parseValue False + withoutKey = try $ Left <$> parseValue True + parseValue allowSpaces = fromEntities <$> (withQuotes <|> withoutQuotes allowSpaces) withQuotes = between (char '"') (char '"') (\_ -> count 1 $ noneOf ['"']) withoutQuotes allowSpaces - | allowSpaces == True = many1 $ noneOf "}" - | otherwise = many1 $ noneOf " }" + | allowSpaces = many1 $ noneOf "}" + | otherwise = many1 $ noneOf " }" nestedInlines :: (Show a, PandocMonad m) => TWParser m a -> TWParser m B.Inlines nestedInlines end = innerSpace <|> nestedInline where - innerSpace = try $ whitespace <* (notFollowedBy end) + innerSpace = try $ whitespace <* notFollowedBy end nestedInline = notFollowedBy whitespace >> nested inline strong :: PandocMonad m => TWParser m B.Inlines -strong = try $ enclosed (char '*') nestedInlines >>= return . B.strong +strong = try $ B.strong <$> enclosed (char '*') nestedInlines strongHtml :: PandocMonad m => TWParser m B.Inlines -strongHtml = (parseHtmlContent "strong" inline <|> parseHtmlContent "b" inline) - >>= return . B.strong . mconcat +strongHtml = B.strong . mconcat <$> (parseHtmlContent "strong" inline <|> parseHtmlContent "b" inline) strongAndEmph :: PandocMonad m => TWParser m B.Inlines -strongAndEmph = try $ enclosed (string "__") nestedInlines >>= return . B.emph . B.strong +strongAndEmph = try $ B.emph . B.strong <$> enclosed (string "__") nestedInlines emph :: PandocMonad m => TWParser m B.Inlines -emph = try $ enclosed (char '_') nestedInlines >>= return . B.emph +emph = try $ B.emph <$> enclosed (char '_') nestedInlines emphHtml :: PandocMonad m => TWParser m B.Inlines -emphHtml = (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline) - >>= return . B.emph . mconcat +emphHtml = B.emph . mconcat <$> (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline) nestedString :: (Show a, PandocMonad m) => TWParser m a -> TWParser m String -nestedString end = innerSpace <|> (count 1 nonspaceChar) +nestedString end = innerSpace <|> count 1 nonspaceChar where innerSpace = try $ many1 spaceChar <* notFollowedBy end boldCode :: PandocMonad m => TWParser m B.Inlines -boldCode = try $ enclosed (string "==") nestedString >>= return . B.strong . B.code . fromEntities +boldCode = try $ (B.strong . B.code . fromEntities) <$> enclosed (string "==") nestedString htmlComment :: PandocMonad m => TWParser m B.Inlines htmlComment = htmlTag isCommentTag >> return mempty code :: PandocMonad m => TWParser m B.Inlines -code = try $ enclosed (char '=') nestedString >>= return . B.code . fromEntities +code = try $ (B.code . fromEntities) <$> enclosed (char '=') nestedString codeHtml :: PandocMonad m => TWParser m B.Inlines codeHtml = do @@ -464,7 +454,7 @@ autoLink = try $ do state <- getState guard $ stateAllowLinks state (text, url) <- parseLink - guard $ checkLink (head $ reverse url) + guard $ checkLink (last url) return $ makeLink (text, url) where parseLink = notFollowedBy nop >> (uri <|> emailAddress) @@ -474,17 +464,17 @@ autoLink = try $ do | otherwise = isAlphaNum c str :: PandocMonad m => TWParser m B.Inlines -str = (many1 alphaNum <|> count 1 characterReference) >>= return . B.str +str = B.str <$> (many1 alphaNum <|> count 1 characterReference) nop :: PandocMonad m => TWParser m B.Inlines -nop = try $ (skip exclamation <|> skip nopTag) >> followContent +nop = try $ (void exclamation <|> void nopTag) >> followContent where exclamation = char '!' nopTag = stringAnyCase "<nop>" - followContent = many1 nonspaceChar >>= return . B.str . fromEntities + followContent = B.str . fromEntities <$> many1 nonspaceChar symbol :: PandocMonad m => TWParser m B.Inlines -symbol = count 1 nonspaceChar >>= return . B.str +symbol = B.str <$> count 1 nonspaceChar smart :: PandocMonad m => TWParser m B.Inlines smart = do @@ -498,17 +488,16 @@ smart = do singleQuoted :: PandocMonad m => TWParser m B.Inlines singleQuoted = try $ do singleQuoteStart - withQuoteContext InSingleQuote $ - many1Till inline singleQuoteEnd >>= - (return . B.singleQuoted . B.trimInlines . mconcat) + withQuoteContext InSingleQuote + (B.singleQuoted . B.trimInlines . mconcat <$> many1Till inline singleQuoteEnd) doubleQuoted :: PandocMonad m => TWParser m B.Inlines doubleQuoted = try $ do doubleQuoteStart contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) - (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> + withQuoteContext InDoubleQuote (doubleQuoteEnd >> return (B.doubleQuoted $ B.trimInlines contents)) - <|> (return $ (B.str "\8220") B.<> contents) + <|> return (B.str "\8220" B.<> contents) link :: PandocMonad m => TWParser m B.Inlines link = try $ do @@ -527,5 +516,5 @@ linkText = do char ']' return (url, "", content) where - linkContent = (char '[') >> many1Till anyChar (char ']') >>= parseLinkContent + linkContent = char '[' >> many1Till anyChar (char ']') >>= parseLinkContent parseLinkContent = parseFromString' $ many1 inline diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index bc3bcaf26..4b65be347 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -540,7 +540,7 @@ wordChunk = try $ do str :: PandocMonad m => ParserT [Char] ParserState m Inlines str = do baseStr <- hyphenedWords - -- RedCloth compliance : if parsed word is uppercase and immediatly + -- RedCloth compliance : if parsed word is uppercase and immediately -- followed by parens, parens content is unconditionally word acronym fullStr <- option baseStr $ try $ do guard $ all isUpper baseStr diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs index 5c7507248..8458b05e5 100644 --- a/src/Text/Pandoc/Readers/TikiWiki.hs +++ b/src/Text/Pandoc/Readers/TikiWiki.hs @@ -22,6 +22,7 @@ import Prelude import Control.Monad import Control.Monad.Except (throwError) import qualified Data.Foldable as F +import Data.List (dropWhileEnd) import Data.Maybe (fromMaybe) import Data.Text (Text) import qualified Data.Text as T @@ -166,7 +167,7 @@ table = try $ do -- return $ B.simpleTable (headers rows) $ trace ("rows: " ++ (show rows)) rows return $B.simpleTable (headers rows) rows where - -- The headers are as many empty srings as the number of columns + -- The headers are as many empty strings as the number of columns -- in the first row headers rows = map (B.plain . B.str) $replicate (length $ head rows) "" @@ -319,7 +320,7 @@ listItem = choice [ bulletItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks) bulletItem = try $ do prefix <- many1 $ char '*' - many1 $ char ' ' + many $ char ' ' content <- listItemLine (length prefix) return (LN Bullet (length prefix), B.plain content) @@ -331,7 +332,7 @@ bulletItem = try $ do numberedItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks) numberedItem = try $ do prefix <- many1 $ char '#' - many1 $ char ' ' + many $ char ' ' content <- listItemLine (length prefix) return (LN Numbered (length prefix), B.plain content) @@ -346,7 +347,7 @@ listItemLine nest = lineContent >>= parseContent listContinuation = string (replicate nest '+') >> lineContent parseContent x = do parsed <- parseFromString (many1 inline) x - return $ mconcat parsed + return $ mconcat $ dropWhileEnd (== B.space) parsed -- Turn the CODE macro attributes into Pandoc code block attributes. mungeAttrs :: [(String, String)] -> (String, [String], [(String, String)]) diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index bed49fd46..26dc934a9 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -198,7 +198,7 @@ para = try $ do commentBlock :: T2T Blocks commentBlock = try (blockMarkupArea anyLine (const mempty) "%%%") <|> comment --- Seperator and Strong line treated the same +-- Separator and Strong line treated the same hrule :: T2T Blocks hrule = try $ do spaces @@ -575,8 +575,10 @@ symbol = B.str . (:[]) <$> oneOf specialChars getTarget :: T2T String getTarget = do mv <- lookupMeta "target" . stateMeta <$> getState - let MetaString target = fromMaybe (MetaString "html") mv - return target + return $ case mv of + Just (MetaString target) -> target + Just (MetaInlines [Str target]) -> target + _ -> "html" atStart :: T2T () atStart = (sourceColumn <$> getPosition) >>= guard . (== 1) diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs index 824a912c3..15f0d991f 100644 --- a/src/Text/Pandoc/Readers/Vimwiki.hs +++ b/src/Text/Pandoc/Readers/Vimwiki.hs @@ -429,9 +429,7 @@ tableRow = try $ do s <- lookAhead $ manyTill anyChar (try (char '|' >> many spaceChar >> newline)) guard $ not $ "||" `isInfixOf` ("|" ++ s ++ "|") - tr <- many tableCell - many spaceChar >> char '\n' - return tr + many tableCell <* many spaceChar <* char '\n' tableCell :: PandocMonad m => VwParser m Blocks tableCell = try $ @@ -451,13 +449,13 @@ ph s = try $ do noHtmlPh :: PandocMonad m => VwParser m () noHtmlPh = try $ - () <$ (many spaceChar >> string "%nohtml" >> many spaceChar - >> lookAhead newline) + () <$ many spaceChar <* string "%nohtml" <* many spaceChar + <* lookAhead newline templatePh :: PandocMonad m => VwParser m () templatePh = try $ - () <$ (many spaceChar >> string "%template" >>many (noneOf "\n") - >> lookAhead newline) + () <$ many spaceChar <* string "%template" <* many (noneOf "\n") + <* lookAhead newline -- inline parser @@ -617,10 +615,8 @@ procImgurl :: String -> String procImgurl s = if take 6 s == "local:" then "file" ++ drop 5 s else s inlineMath :: PandocMonad m => VwParser m Inlines -inlineMath = try $ do - char '$' - contents <- many1Till (noneOf "\n") (char '$') - return $ B.math contents +inlineMath = try $ + B.math <$ char '$' <*> many1Till (noneOf "\n") (char '$') tag :: PandocMonad m => VwParser m Inlines tag = try $ do diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 26b01bc90..9f48080b8 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -79,6 +79,7 @@ module Text.Pandoc.Shared ( makeMeta, eastAsianLineBreakFilter, underlineSpan, + splitSentences, -- * TagSoup HTML handling renderTags', -- * File handling @@ -94,6 +95,8 @@ module Text.Pandoc.Shared ( -- * for squashing blocks blocksToInlines, blocksToInlines', + blocksToInlinesWithSep, + defaultBlocksSeparator, -- * Safe read safeRead, -- * Temp directory @@ -580,6 +583,31 @@ eastAsianLineBreakFilter = bottomUp go underlineSpan :: Inlines -> Inlines underlineSpan = B.spanWith ("", ["underline"], []) +-- | Returns the first sentence in a list of inlines, and the rest. +breakSentence :: [Inline] -> ([Inline], [Inline]) +breakSentence [] = ([],[]) +breakSentence xs = + let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True + isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True + isSentenceEndInline LineBreak = True + isSentenceEndInline _ = False + (as, bs) = break isSentenceEndInline xs + in case bs of + [] -> (as, []) + [c] -> (as ++ [c], []) + (c:Space:cs) -> (as ++ [c], cs) + (c:SoftBreak:cs) -> (as ++ [c], cs) + (Str ".":Str (')':ys):cs) -> (as ++ [Str ".", Str (')':ys)], cs) + (x@(Str ('.':')':_)):cs) -> (as ++ [x], cs) + (LineBreak:x@(Str ('.':_)):cs) -> (as ++[LineBreak], x:cs) + (c:cs) -> (as ++ [c] ++ ds, es) + where (ds, es) = breakSentence cs + +-- | Split a list of inlines into sentences. +splitSentences :: [Inline] -> [[Inline]] +splitSentences xs = + let (sent, rest) = breakSentence xs + in if null rest then [sent] else sent : splitSentences rest -- -- TagSoup HTML handling @@ -712,7 +740,7 @@ schemes = Set.fromList , "ws", "wss", "wtai", "wyciwyg", "xcon", "xcon-userid", "xfire" , "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "xri", "ymsgr", "z39.50", "z39.50r" , "z39.50s" - -- Inofficial schemes + -- Unofficial schemes , "doi", "isbn", "javascript", "pmid" ] @@ -757,12 +785,19 @@ blocksToInlinesWithSep sep = mconcat . intersperse sep . map blockToInlines blocksToInlines' :: [Block] -> Inlines -blocksToInlines' = blocksToInlinesWithSep parSep - where parSep = B.space <> B.str "¶" <> B.space +blocksToInlines' = blocksToInlinesWithSep defaultBlocksSeparator blocksToInlines :: [Block] -> [Inline] blocksToInlines = B.toList . blocksToInlines' +-- | Inline elements used to separate blocks when squashing blocks into +-- inlines. +defaultBlocksSeparator :: Inlines +defaultBlocksSeparator = + -- This is used in the pandoc.utils.blocks_to_inlines function. Docs + -- there should be updated if this is changed. + B.space <> B.str "¶" <> B.space + -- -- Safe read diff --git a/src/Text/Pandoc/Translations.hs b/src/Text/Pandoc/Translations.hs index 4a216af92..13dcb3b61 100644 --- a/src/Text/Pandoc/Translations.hs +++ b/src/Text/Pandoc/Translations.hs @@ -48,11 +48,12 @@ module Text.Pandoc.Translations ( ) where import Prelude -import Data.Aeson.Types (typeMismatch) +import Data.Aeson.Types (Value(..), FromJSON(..)) +import qualified Data.Aeson.Types as Aeson import qualified Data.HashMap.Strict as HM import qualified Data.Map as M import Data.Text as T -import Data.Yaml as Yaml +import qualified Data.YAML as YAML import GHC.Generics (Generic) import Text.Pandoc.Shared (safeRead) import qualified Text.Pandoc.UTF8 as UTF8 @@ -90,7 +91,15 @@ instance FromJSON Term where Just t' -> pure t' Nothing -> fail $ "Invalid Term name " ++ show t - parseJSON invalid = typeMismatch "Term" invalid + parseJSON invalid = Aeson.typeMismatch "Term" invalid + +instance YAML.FromYAML Term where + parseYAML (YAML.Scalar (YAML.SStr t)) = + case safeRead (T.unpack t) of + Just t' -> pure t' + Nothing -> fail $ "Invalid Term name " ++ + show t + parseYAML invalid = YAML.typeMismatch "Term" invalid instance FromJSON Translations where parseJSON (Object hm) = do @@ -102,14 +111,28 @@ instance FromJSON Translations where Just t -> case v of (String s) -> return (t, T.unpack $ T.strip s) - inv -> typeMismatch "String" inv - parseJSON invalid = typeMismatch "Translations" invalid + inv -> Aeson.typeMismatch "String" inv + parseJSON invalid = Aeson.typeMismatch "Translations" invalid + +instance YAML.FromYAML Translations where + parseYAML = YAML.withMap "Translations" $ + \tr -> Translations .M.fromList <$> mapM addItem (M.toList tr) + where addItem (n@(YAML.Scalar (YAML.SStr k)), v) = + case safeRead (T.unpack k) of + Nothing -> YAML.typeMismatch "Term" n + Just t -> + case v of + (YAML.Scalar (YAML.SStr s)) -> + return (t, T.unpack (T.strip s)) + n' -> YAML.typeMismatch "String" n' + addItem (n, _) = YAML.typeMismatch "String" n lookupTerm :: Term -> Translations -> Maybe String lookupTerm t (Translations tm) = M.lookup t tm readTranslations :: String -> Either String Translations readTranslations s = - case Yaml.decodeEither' $ UTF8.fromString s of - Left err' -> Left $ prettyPrintParseException err' - Right t -> Right t + case YAML.decodeStrict $ UTF8.fromString s of + Left err' -> Left err' + Right (t:_) -> Right t + Right [] -> Left "empty YAML document" diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs index c1bae7038..60ff269da 100644 --- a/src/Text/Pandoc/UUID.hs +++ b/src/Text/Pandoc/UUID.hs @@ -67,13 +67,14 @@ instance Show UUID where getUUID :: RandomGen g => g -> UUID getUUID gen = - let [a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p] = take 16 $ randoms gen :: [Word8] - -- set variant - i' = i `setBit` 7 `clearBit` 6 - -- set version (0100 for random) - g' = g `clearBit` 7 `setBit` 6 `clearBit` 5 `clearBit` 4 - in - UUID a b c d e f g' h i' j k l m n o p + case take 16 (randoms gen :: [Word8]) of + [a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p] -> + -- set variant + let i' = i `setBit` 7 `clearBit` 6 + -- set version (0100 for random) + g' = g `clearBit` 7 `setBit` 6 `clearBit` 5 `clearBit` 4 + in UUID a b c d e f g' h i' j k l m n o p + _ -> error "not enough random numbers for UUID" -- should not happen getRandomUUID :: IO UUID getRandomUUID = getUUID <$> getStdGen diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 036185282..ffe5b7473 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -44,7 +44,7 @@ import Data.Aeson (Result (..), Value (String), fromJSON, toJSON) import Data.Char (isPunctuation, isSpace) import Data.List (intercalate, intersperse, stripPrefix) import qualified Data.Map as M -import Data.Maybe (fromMaybe, isJust) +import Data.Maybe (fromMaybe, isJust, listToMaybe) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T @@ -126,11 +126,16 @@ olMarker = do (start, style', delim) <- anyOrderedListMarker else spaceChar -- | True if string begins with an ordered list marker -beginsWithOrderedListMarker :: String -> Bool -beginsWithOrderedListMarker str = - case runParser olMarker defaultParserState "para start" (take 10 str) of - Left _ -> False - Right _ -> True +-- or would be interpreted as an AsciiDoc option command +needsEscaping :: String -> Bool +needsEscaping s = beginsWithOrderedListMarker s || isBracketed s + where + beginsWithOrderedListMarker str = + case runParser olMarker defaultParserState "para start" (take 10 str) of + Left _ -> False + Right _ -> True + isBracketed ('[':cs) = listToMaybe (reverse cs) == Just ']' + isBracketed _ = False -- | Convert Pandoc block element to asciidoc. blockToAsciiDoc :: PandocMonad m @@ -146,8 +151,8 @@ blockToAsciiDoc opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = blockToAsciiDoc opts (Para inlines) = do contents <- inlineListToAsciiDoc opts inlines -- escape if para starts with ordered list marker - let esc = if beginsWithOrderedListMarker (render Nothing contents) - then text "\\" + let esc = if needsEscaping (render Nothing contents) + then text "{empty}" else empty return $ esc <> contents <> blankline blockToAsciiDoc opts (LineBlock lns) = do @@ -280,7 +285,7 @@ blockToAsciiDoc opts (DefinitionList items) = do contents <- mapM (definitionListItemToAsciiDoc opts) items return $ cat contents <> blankline blockToAsciiDoc opts (Div (ident,_,_) bs) = do - let identifier = if null ident then empty else ("[[" <> text ident <> "]]") + let identifier = if null ident then empty else "[[" <> text ident <> "]]" contents <- blockListToAsciiDoc opts bs return $ identifier $$ contents @@ -487,6 +492,6 @@ inlineToAsciiDoc opts (Note [Plain inlines]) = do -- asciidoc can't handle blank lines in notes inlineToAsciiDoc _ (Note _) = return "[multiblock footnote omitted]" inlineToAsciiDoc opts (Span (ident,_,_) ils) = do - let identifier = if null ident then empty else ("[[" <> text ident <> "]]") + let identifier = if null ident then empty else "[[" <> text ident <> "]]" contents <- inlineListToAsciiDoc opts ils return $ identifier <> contents diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs index 98c1101fa..84ea37f38 100644 --- a/src/Text/Pandoc/Writers/CommonMark.hs +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -45,7 +45,7 @@ import Network.HTTP (urlEncode) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Options -import Text.Pandoc.Shared (isTightList, linesToPara, substitute) +import Text.Pandoc.Shared (isTightList, linesToPara, substitute, capitalize) import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Walk (query, walk, walkM) import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes) @@ -253,18 +253,34 @@ inlineToNodes opts (Strong xs) = (node STRONG (inlinesToNodes opts xs) :) inlineToNodes opts (Strikeout xs) = if isEnabled Ext_strikeout opts then (node (CUSTOM_INLINE "~~" "~~") (inlinesToNodes opts xs) :) - else ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes opts xs ++ - [node (HTML_INLINE (T.pack "</s>")) []]) ++ ) + else if isEnabled Ext_raw_html opts + then ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes opts xs ++ + [node (HTML_INLINE (T.pack "</s>")) []]) ++ ) + else (inlinesToNodes opts xs ++) inlineToNodes opts (Superscript xs) = - ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes opts xs ++ - [node (HTML_INLINE (T.pack "</sup>")) []]) ++ ) + if isEnabled Ext_raw_html opts + then ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes opts xs ++ + [node (HTML_INLINE (T.pack "</sup>")) []]) ++ ) + else case traverse toSuperscriptInline xs of + Nothing -> + ((node (TEXT (T.pack "^(")) [] : inlinesToNodes opts xs ++ + [node (TEXT (T.pack ")")) []]) ++ ) + Just xs' -> (inlinesToNodes opts xs' ++) inlineToNodes opts (Subscript xs) = - ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes opts xs ++ - [node (HTML_INLINE (T.pack "</sub>")) []]) ++ ) + if isEnabled Ext_raw_html opts + then ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes opts xs ++ + [node (HTML_INLINE (T.pack "</sub>")) []]) ++ ) + else case traverse toSubscriptInline xs of + Nothing -> + ((node (TEXT (T.pack "_(")) [] : inlinesToNodes opts xs ++ + [node (TEXT (T.pack ")")) []]) ++ ) + Just xs' -> (inlinesToNodes opts xs' ++) inlineToNodes opts (SmallCaps xs) = - ((node (HTML_INLINE (T.pack "<span class=\"smallcaps\">")) [] - : inlinesToNodes opts xs ++ - [node (HTML_INLINE (T.pack "</span>")) []]) ++ ) + if isEnabled Ext_raw_html opts + then ((node (HTML_INLINE (T.pack "<span class=\"smallcaps\">")) [] + : inlinesToNodes opts xs ++ + [node (HTML_INLINE (T.pack "</span>")) []]) ++ ) + else (inlinesToNodes opts (capitalize xs) ++) inlineToNodes opts (Link _ ils (url,tit)) = (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes opts ils) :) -- title beginning with fig: indicates implicit figure @@ -304,6 +320,11 @@ inlineToNodes opts (Math mt str) = (node (HTML_INLINE (T.pack ("\\(" ++ str ++ "\\)"))) [] :) DisplayMath -> (node (HTML_INLINE (T.pack ("\\[" ++ str ++ "\\]"))) [] :) +inlineToNodes opts (Span ("",["emoji"],kvs) [Str s]) = do + case lookup "data-emoji" kvs of + Just emojiname | isEnabled Ext_emoji opts -> + (node (TEXT (":" <> T.pack emojiname <> ":")) [] :) + _ -> (node (TEXT (T.pack s)) [] :) inlineToNodes opts (Span attr ils) = let nodes = inlinesToNodes opts ils op = tagWithAttributes opts True False "span" attr @@ -314,3 +335,19 @@ inlineToNodes opts (Span attr ils) = inlineToNodes opts (Cite _ ils) = (inlinesToNodes opts ils ++) inlineToNodes _ (Note _) = id -- should not occur -- we remove Note elements in preprocessing + +toSubscriptInline :: Inline -> Maybe Inline +toSubscriptInline Space = Just Space +toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils +toSubscriptInline (Str s) = Str <$> traverse toSubscript s +toSubscriptInline LineBreak = Just LineBreak +toSubscriptInline SoftBreak = Just SoftBreak +toSubscriptInline _ = Nothing + +toSuperscriptInline :: Inline -> Maybe Inline +toSuperscriptInline Space = Just Space +toSuperscriptInline (Span attr ils) = Span attr <$> traverse toSuperscriptInline ils +toSuperscriptInline (Str s) = Str <$> traverse toSuperscript s +toSuperscriptInline LineBreak = Just LineBreak +toSuperscriptInline SoftBreak = Just SoftBreak +toSuperscriptInline _ = Nothing diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 10e996bdb..1f9760442 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -190,10 +190,9 @@ blockToConTeXt (BlockQuote lst) = do blockToConTeXt (CodeBlock _ str) = return $ flush ("\\starttyping" <> cr <> text str <> cr <> "\\stoptyping") $$ blankline -- blankline because \stoptyping can't have anything after it, inc. '}' -blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline -blockToConTeXt b@(RawBlock _ _ ) = do - report $ BlockNotRendered b - return empty +blockToConTeXt b@(RawBlock f str) + | f == Format "context" || f == Format "tex" = return $ text str <> blankline + | otherwise = empty <$ report (BlockNotRendered b) blockToConTeXt (Div (ident,_,kvs) bs) = do let align dir txt = "\\startalignment[" <> dir <> "]" $$ txt $$ "\\stopalignment" mblang <- fromBCP47 (lookup "lang" kvs) @@ -330,8 +329,7 @@ alignToConTeXt align = case align of AlignDefault -> empty listItemToConTeXt :: PandocMonad m => [Block] -> WM m Doc -listItemToConTeXt list = blockListToConTeXt list >>= - return . ("\\item" $$) . nest 2 +listItemToConTeXt list = (("\\item" $$) . nest 2) <$> blockListToConTeXt list defListItemToConTeXt :: PandocMonad m => ([Inline], [[Block]]) -> WM m Doc defListItemToConTeXt (term, defs) = do @@ -401,11 +399,9 @@ inlineToConTeXt (Math InlineMath str) = return $ char '$' <> text str <> char '$' inlineToConTeXt (Math DisplayMath str) = return $ text "\\startformula " <> text str <> text " \\stopformula" <> space -inlineToConTeXt (RawInline "context" str) = return $ text str -inlineToConTeXt (RawInline "tex" str) = return $ text str -inlineToConTeXt il@(RawInline _ _) = do - report $ InlineNotRendered il - return empty +inlineToConTeXt il@(RawInline f str) + | f == Format "tex" || f == Format "context" = return $ text str + | otherwise = empty <$ report (InlineNotRendered il) inlineToConTeXt LineBreak = return $ text "\\crlf" <> cr inlineToConTeXt SoftBreak = do wrapText <- gets (writerWrapText . stOptions) @@ -457,7 +453,12 @@ inlineToConTeXt (Image attr@(_,cls,_) _ (src, _)) = do clas = if null cls then empty else brackets $ text $ toLabel $ head cls - src' = if isURI src + -- Use / for path separators on Windows; see #4918 + fixPathSeparators = map $ \c -> case c of + '\\' -> '/' + _ -> c + src' = fixPathSeparators $ + if isURI src then src else unEscapeString src return $ braces $ "\\externalfigure" <> brackets (text src') <> dims <> clas diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 53b321c7c..37fec9f0f 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -1,6 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE DeriveDataTypeable #-} -{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE NoImplicitPrelude #-} {- Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify @@ -35,25 +35,26 @@ import Prelude import Control.Arrow ((***)) import Control.Exception import Control.Monad (when) -import Control.Monad.Trans (MonadIO (liftIO)) import Data.Char (toLower) import Data.List (intersperse) import qualified Data.Map as M import Data.Text (Text, pack) import Data.Typeable -import Foreign.Lua (Lua, ToLuaStack (..), callFunc) -import Foreign.Lua.Api +import Foreign.Lua (Lua, Pushable) import Text.Pandoc.Class (PandocIO) import Text.Pandoc.Definition import Text.Pandoc.Error -import Text.Pandoc.Lua.Init (runPandocLua, registerScriptPath) +import Text.Pandoc.Lua.Init (LuaException (LuaException), runPandocLua, + registerScriptPath) import Text.Pandoc.Lua.StackInstances () -import Text.Pandoc.Lua.Util (addValue, dostring') +import Text.Pandoc.Lua.Util (addField, dofileWithTraceback) import Text.Pandoc.Options import Text.Pandoc.Templates import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Writers.Shared +import qualified Foreign.Lua as Lua + attrToMap :: Attr -> M.Map String String attrToMap (id',classes,keyvals) = M.fromList $ ("id", id') @@ -62,41 +63,43 @@ attrToMap (id',classes,keyvals) = M.fromList newtype Stringify a = Stringify a -instance ToLuaStack (Stringify Format) where - push (Stringify (Format f)) = push (map toLower f) +instance Pushable (Stringify Format) where + push (Stringify (Format f)) = Lua.push (map toLower f) -instance ToLuaStack (Stringify [Inline]) where - push (Stringify ils) = push =<< inlineListToCustom ils +instance Pushable (Stringify [Inline]) where + push (Stringify ils) = Lua.push =<< inlineListToCustom ils -instance ToLuaStack (Stringify [Block]) where - push (Stringify blks) = push =<< blockListToCustom blks +instance Pushable (Stringify [Block]) where + push (Stringify blks) = Lua.push =<< blockListToCustom blks -instance ToLuaStack (Stringify MetaValue) where - push (Stringify (MetaMap m)) = push (fmap Stringify m) - push (Stringify (MetaList xs)) = push (map Stringify xs) - push (Stringify (MetaBool x)) = push x - push (Stringify (MetaString s)) = push s - push (Stringify (MetaInlines ils)) = push (Stringify ils) - push (Stringify (MetaBlocks bs)) = push (Stringify bs) +instance Pushable (Stringify MetaValue) where + push (Stringify (MetaMap m)) = Lua.push (fmap Stringify m) + push (Stringify (MetaList xs)) = Lua.push (map Stringify xs) + push (Stringify (MetaBool x)) = Lua.push x + push (Stringify (MetaString s)) = Lua.push s + push (Stringify (MetaInlines ils)) = Lua.push (Stringify ils) + push (Stringify (MetaBlocks bs)) = Lua.push (Stringify bs) -instance ToLuaStack (Stringify Citation) where +instance Pushable (Stringify Citation) where push (Stringify cit) = do - createtable 6 0 - addValue "citationId" $ citationId cit - addValue "citationPrefix" . Stringify $ citationPrefix cit - addValue "citationSuffix" . Stringify $ citationSuffix cit - addValue "citationMode" $ show (citationMode cit) - addValue "citationNoteNum" $ citationNoteNum cit - addValue "citationHash" $ citationHash cit + Lua.createtable 6 0 + addField "citationId" $ citationId cit + addField "citationPrefix" . Stringify $ citationPrefix cit + addField "citationSuffix" . Stringify $ citationSuffix cit + addField "citationMode" $ show (citationMode cit) + addField "citationNoteNum" $ citationNoteNum cit + addField "citationHash" $ citationHash cit -- | Key-value pair, pushed as a table with @a@ as the only key and @v@ as the -- associated value. newtype KeyValue a b = KeyValue (a, b) -instance (ToLuaStack a, ToLuaStack b) => ToLuaStack (KeyValue a b) where +instance (Pushable a, Pushable b) => Pushable (KeyValue a b) where push (KeyValue (k, v)) = do - newtable - addValue k v + Lua.newtable + Lua.push k + Lua.push v + Lua.rawset (Lua.nthFromTop 3) data PandocLuaException = PandocLuaException String deriving (Show, Typeable) @@ -106,14 +109,13 @@ instance Exception PandocLuaException -- | Convert Pandoc to custom markup. writeCustom :: FilePath -> WriterOptions -> Pandoc -> PandocIO Text writeCustom luaFile opts doc@(Pandoc meta _) = do - luaScript <- liftIO $ UTF8.readFile luaFile res <- runPandocLua $ do registerScriptPath luaFile - stat <- dostring' luaScript + stat <- dofileWithTraceback luaFile -- check for error in lua script (later we'll change the return type -- to handle this more gracefully): - when (stat /= OK) $ - tostring 1 >>= throw . PandocLuaException . UTF8.toString + when (stat /= Lua.OK) $ + Lua.tostring' (-1) >>= throw . PandocLuaException . UTF8.toString -- TODO - call hierarchicalize, so we have that info rendered <- docToCustom opts doc context <- metaToJSON opts @@ -122,7 +124,7 @@ writeCustom luaFile opts doc@(Pandoc meta _) = do meta return (rendered, context) let (body, context) = case res of - Left e -> throw (PandocLuaException (show e)) + Left (LuaException msg) -> throw (PandocLuaException msg) Right x -> x case writerTemplate opts of Nothing -> return $ pack body @@ -134,7 +136,7 @@ writeCustom luaFile opts doc@(Pandoc meta _) = do docToCustom :: WriterOptions -> Pandoc -> Lua String docToCustom opts (Pandoc (Meta metamap) blocks) = do body <- blockListToCustom blocks - callFunc "Doc" body (fmap Stringify metamap) (writerVariables opts) + Lua.callFunc "Doc" body (fmap Stringify metamap) (writerVariables opts) -- | Convert Pandoc block element to Custom. blockToCustom :: Block -- ^ Block element @@ -142,52 +144,55 @@ blockToCustom :: Block -- ^ Block element blockToCustom Null = return "" -blockToCustom (Plain inlines) = callFunc "Plain" (Stringify inlines) +blockToCustom (Plain inlines) = Lua.callFunc "Plain" (Stringify inlines) blockToCustom (Para [Image attr txt (src,tit)]) = - callFunc "CaptionedImage" src tit (Stringify txt) (attrToMap attr) + Lua.callFunc "CaptionedImage" src tit (Stringify txt) (attrToMap attr) -blockToCustom (Para inlines) = callFunc "Para" (Stringify inlines) +blockToCustom (Para inlines) = Lua.callFunc "Para" (Stringify inlines) -blockToCustom (LineBlock linesList) = callFunc "LineBlock" (map Stringify linesList) +blockToCustom (LineBlock linesList) = + Lua.callFunc "LineBlock" (map Stringify linesList) blockToCustom (RawBlock format str) = - callFunc "RawBlock" (Stringify format) str + Lua.callFunc "RawBlock" (Stringify format) str -blockToCustom HorizontalRule = callFunc "HorizontalRule" +blockToCustom HorizontalRule = Lua.callFunc "HorizontalRule" blockToCustom (Header level attr inlines) = - callFunc "Header" level (Stringify inlines) (attrToMap attr) + Lua.callFunc "Header" level (Stringify inlines) (attrToMap attr) blockToCustom (CodeBlock attr str) = - callFunc "CodeBlock" str (attrToMap attr) + Lua.callFunc "CodeBlock" str (attrToMap attr) -blockToCustom (BlockQuote blocks) = callFunc "BlockQuote" (Stringify blocks) +blockToCustom (BlockQuote blocks) = + Lua.callFunc "BlockQuote" (Stringify blocks) blockToCustom (Table capt aligns widths headers rows) = let aligns' = map show aligns capt' = Stringify capt headers' = map Stringify headers rows' = map (map Stringify) rows - in callFunc "Table" capt' aligns' widths headers' rows' + in Lua.callFunc "Table" capt' aligns' widths headers' rows' -blockToCustom (BulletList items) = callFunc "BulletList" (map Stringify items) +blockToCustom (BulletList items) = + Lua.callFunc "BulletList" (map Stringify items) blockToCustom (OrderedList (num,sty,delim) items) = - callFunc "OrderedList" (map Stringify items) num (show sty) (show delim) + Lua.callFunc "OrderedList" (map Stringify items) num (show sty) (show delim) blockToCustom (DefinitionList items) = - callFunc "DefinitionList" - (map (KeyValue . (Stringify *** map Stringify)) items) + Lua.callFunc "DefinitionList" + (map (KeyValue . (Stringify *** map Stringify)) items) blockToCustom (Div attr items) = - callFunc "Div" (Stringify items) (attrToMap attr) + Lua.callFunc "Div" (Stringify items) (attrToMap attr) -- | Convert list of Pandoc block elements to Custom. blockListToCustom :: [Block] -- ^ List of block elements -> Lua String blockListToCustom xs = do - blocksep <- callFunc "Blocksep" + blocksep <- Lua.callFunc "Blocksep" bs <- mapM blockToCustom xs return $ mconcat $ intersperse blocksep bs @@ -200,51 +205,51 @@ inlineListToCustom lst = do -- | Convert Pandoc inline element to Custom. inlineToCustom :: Inline -> Lua String -inlineToCustom (Str str) = callFunc "Str" str +inlineToCustom (Str str) = Lua.callFunc "Str" str -inlineToCustom Space = callFunc "Space" +inlineToCustom Space = Lua.callFunc "Space" -inlineToCustom SoftBreak = callFunc "SoftBreak" +inlineToCustom SoftBreak = Lua.callFunc "SoftBreak" -inlineToCustom (Emph lst) = callFunc "Emph" (Stringify lst) +inlineToCustom (Emph lst) = Lua.callFunc "Emph" (Stringify lst) -inlineToCustom (Strong lst) = callFunc "Strong" (Stringify lst) +inlineToCustom (Strong lst) = Lua.callFunc "Strong" (Stringify lst) -inlineToCustom (Strikeout lst) = callFunc "Strikeout" (Stringify lst) +inlineToCustom (Strikeout lst) = Lua.callFunc "Strikeout" (Stringify lst) -inlineToCustom (Superscript lst) = callFunc "Superscript" (Stringify lst) +inlineToCustom (Superscript lst) = Lua.callFunc "Superscript" (Stringify lst) -inlineToCustom (Subscript lst) = callFunc "Subscript" (Stringify lst) +inlineToCustom (Subscript lst) = Lua.callFunc "Subscript" (Stringify lst) -inlineToCustom (SmallCaps lst) = callFunc "SmallCaps" (Stringify lst) +inlineToCustom (SmallCaps lst) = Lua.callFunc "SmallCaps" (Stringify lst) -inlineToCustom (Quoted SingleQuote lst) = callFunc "SingleQuoted" (Stringify lst) +inlineToCustom (Quoted SingleQuote lst) = Lua.callFunc "SingleQuoted" (Stringify lst) -inlineToCustom (Quoted DoubleQuote lst) = callFunc "DoubleQuoted" (Stringify lst) +inlineToCustom (Quoted DoubleQuote lst) = Lua.callFunc "DoubleQuoted" (Stringify lst) -inlineToCustom (Cite cs lst) = callFunc "Cite" (Stringify lst) (map Stringify cs) +inlineToCustom (Cite cs lst) = Lua.callFunc "Cite" (Stringify lst) (map Stringify cs) inlineToCustom (Code attr str) = - callFunc "Code" str (attrToMap attr) + Lua.callFunc "Code" str (attrToMap attr) inlineToCustom (Math DisplayMath str) = - callFunc "DisplayMath" str + Lua.callFunc "DisplayMath" str inlineToCustom (Math InlineMath str) = - callFunc "InlineMath" str + Lua.callFunc "InlineMath" str inlineToCustom (RawInline format str) = - callFunc "RawInline" (Stringify format) str + Lua.callFunc "RawInline" (Stringify format) str -inlineToCustom LineBreak = callFunc "LineBreak" +inlineToCustom LineBreak = Lua.callFunc "LineBreak" inlineToCustom (Link attr txt (src,tit)) = - callFunc "Link" (Stringify txt) src tit (attrToMap attr) + Lua.callFunc "Link" (Stringify txt) src tit (attrToMap attr) inlineToCustom (Image attr alt (src,tit)) = - callFunc "Image" (Stringify alt) src tit (attrToMap attr) + Lua.callFunc "Image" (Stringify alt) src tit (attrToMap attr) -inlineToCustom (Note contents) = callFunc "Note" (Stringify contents) +inlineToCustom (Note contents) = Lua.callFunc "Note" (Stringify contents) inlineToCustom (Span attr items) = - callFunc "Span" (Stringify items) (attrToMap attr) + Lua.callFunc "Span" (Stringify items) (attrToMap attr) diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index f6e814095..3306e4f31 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -126,9 +126,10 @@ writeDocbook opts (Pandoc meta blocks) = do defField "mathml" (case writerHTMLMathMethod opts of MathML -> True _ -> False) metadata - case writerTemplate opts of - Nothing -> return main - Just tpl -> renderTemplate' tpl context + (if writerPreferAscii opts then toEntities else id) <$> + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context -- | Convert an Element to Docbook. elementToDocbook :: PandocMonad m => WriterOptions -> Int -> Element -> DB m Doc diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 1666c0562..524d20fd1 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -66,7 +66,7 @@ import Text.Pandoc.Readers.Docx.StyleMap import Text.Pandoc.Shared hiding (Element) import Text.Pandoc.Walk import Text.Pandoc.Writers.Math -import Text.Pandoc.Writers.Shared (fixDisplayMath, metaValueToInlines) +import Text.Pandoc.Writers.Shared import Text.Printf (printf) import Text.TeXMath import Text.XML.Light as XML @@ -230,7 +230,7 @@ writeDocx opts doc@(Pandoc meta _) = do let mbAttrMarLeft = (elAttribs <$> mbpgmar) >>= lookupAttrBy ((=="left") . qName) let mbAttrMarRight = (elAttribs <$> mbpgmar) >>= lookupAttrBy ((=="right") . qName) - -- Get the avaible area (converting the size and the margins to int and + -- Get the available area (converting the size and the margins to int and -- doing the difference let pgContentWidth = (-) <$> (read <$> mbAttrSzWidth ::Maybe Integer) <*> ( @@ -266,8 +266,9 @@ writeDocx opts doc@(Pandoc meta _) = do -- parse styledoc for heading styles let styleMaps = getStyleMaps styledoc - let tocTitle = fromMaybe (stTocTitle defaultWriterState) $ - metaValueToInlines <$> lookupMeta "toc-title" meta + let tocTitle = case lookupMetaInlines "toc-title" meta of + [] -> stTocTitle defaultWriterState + ls -> ls let initialSt = defaultWriterState { stStyleMaps = styleMaps @@ -727,7 +728,7 @@ getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists makeTOC :: (PandocMonad m) => WriterOptions -> WS m [Element] -makeTOC opts | writerTableOfContents opts = do +makeTOC opts = do let depth = "1-"++show (writerTOCDepth opts) let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u" tocTitle <- gets stTocTitle @@ -751,8 +752,6 @@ makeTOC opts | writerTableOfContents opts = do ) -- w:p ]) ])] -- w:sdt -makeTOC _ = return [] - -- | Convert Pandoc document to two lists of -- OpenXML elements (the main document and footnotes). @@ -761,15 +760,9 @@ writeOpenXML opts (Pandoc meta blocks) = do let tit = docTitle meta let auths = docAuthors meta let dat = docDate meta - let abstract' = case lookupMeta "abstract" meta of - Just (MetaBlocks bs) -> bs - Just (MetaInlines ils) -> [Plain ils] - _ -> [] - let subtitle' = case lookupMeta "subtitle" meta of - Just (MetaBlocks [Plain xs]) -> xs - Just (MetaBlocks [Para xs]) -> xs - Just (MetaInlines xs) -> xs - _ -> [] + let abstract' = lookupMetaBlocks "abstract" meta + let subtitle' = lookupMetaInlines "subtitle" meta + let includeTOC = writerTableOfContents opts || lookupMetaBool "toc" meta title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] authors <- withParaProp (pCustomStyle "Author") $ blocksToOpenXML opts $ @@ -801,7 +794,9 @@ writeOpenXML opts (Pandoc meta blocks) = do ] ++ annotation ] comments' <- mapM toComment comments - toc <- makeTOC opts + toc <- if includeTOC + then makeTOC opts + else return [] let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ toc return (meta' ++ doc', notes', comments') @@ -908,9 +903,10 @@ blockToOpenXML' opts (Para lst) | null lst && not (isEnabled Ext_empty_paragraphs opts) = return [] | otherwise = do isFirstPara <- gets stFirstPara - paraProps <- getParaProps $ case lst of - [Math DisplayMath _] -> True - _ -> False + let displayMathPara = case lst of + [x] -> isDisplayMath x + _ -> False + paraProps <- getParaProps displayMathPara bodyTextStyle <- pStyleM "Body Text" let paraProps' = case paraProps of [] | isFirstPara -> [mknode "w:pPr" [] diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index f1ff8b482..6099f0223 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -74,6 +74,7 @@ import Text.Printf (printf) import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..), add_attrs, lookupAttr, node, onlyElems, parseXML, ppElement, showElement, strContent, unode, unqual) +import Text.Pandoc.XML (escapeStringForXML) -- A Chapter includes a list of blocks and maybe a section -- number offset. Note, some chapters are unnumbered. The section @@ -446,7 +447,8 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do cpContent <- lift $ writeHtml opts'{ writerVariables = ("coverpage","true"): - ("pagetitle",plainTitle): + ("pagetitle", + escapeStringForXML plainTitle): cssvars True ++ vars } (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"../media/" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"]) imgContent <- lift $ P.readFileLazy img @@ -459,7 +461,8 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do -- title page tpContent <- lift $ writeHtml opts'{ writerVariables = ("titlepage","true"): - ("pagetitle",plainTitle): + ("body-type", "frontmatter"): + ("pagetitle", escapeStringForXML plainTitle): cssvars True ++ vars } (Pandoc meta []) tpEntry <- mkEntry "text/title_page.xhtml" tpContent @@ -563,13 +566,28 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do let chapToEntry num (Chapter mbnum bs) = mkEntry ("text/" ++ showChapter num) =<< writeHtml opts'{ writerNumberOffset = fromMaybe [] mbnum - , writerVariables = cssvars True ++ vars } - (case bs of - (Header _ _ xs : _) -> + , writerVariables = ("body-type", bodyType) : + cssvars True ++ vars } pdoc + where (pdoc, bodyType) = + case bs of + (Header _ (_,_,kvs) xs : _) -> -- remove notes or we get doubled footnotes - Pandoc (setMeta "title" (walk removeNote $ fromList xs) - nullMeta) bs - _ -> Pandoc nullMeta bs) + (Pandoc (setMeta "title" + (walk removeNote $ fromList xs) nullMeta) bs, + case lookup "epub:type" kvs of + Nothing -> "bodymatter" + Just x + | x `elem` frontMatterTypes -> "frontmatter" + | x `elem` backMatterTypes -> "backmatter" + | otherwise -> "bodymatter") + _ -> (Pandoc nullMeta bs, "bodymatter") + frontMatterTypes = ["prologue", "abstract", "acknowledgments", + "copyright-page", "dedication", + "foreword", "halftitle", + "introduction", "preface", + "seriespage", "titlepage"] + backMatterTypes = ["afterword", "appendix", "colophon", + "conclusion", "epigraph"] chapterEntries <- zipWithM chapToEntry [1..] chapters @@ -754,7 +772,8 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do (writeHtmlStringForEPUB version opts{ writerTemplate = Nothing , writerVariables = - ("pagetitle",plainTitle): + ("pagetitle", + escapeStringForXML plainTitle): writerVariables opts} (Pandoc nullMeta [Plain $ walk clean tit])) of @@ -782,7 +801,7 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do [ unode "a" ! [("href", "text/cover.xhtml") ,("epub:type", "cover")] $ "Cover"] | - epubCoverImage metadata /= Nothing + isJust (epubCoverImage metadata) ] ++ [ unode "li" [ unode "a" ! [("href", "#toc") diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index a46011a8f..a139de5cd 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -119,7 +119,7 @@ description meta' = do let as = authors meta' dd <- docdate meta' annotation <- case lookupMeta "abstract" meta' of - Just (MetaBlocks bs) -> (list . el "annotation") <$> cMapM blockToXml bs + Just (MetaBlocks bs) -> (list . el "annotation") <$> cMapM blockToXml (map unPlain bs) _ -> pure mempty let lang = case lookupMeta "lang" meta' of Just (MetaInlines [Str s]) -> [el "lang" $ iso639 s] @@ -135,8 +135,9 @@ description meta' = do Just (MetaString s) -> coverimage s _ -> return [] return $ el "description" - [ el "title-info" (genre : (bt ++ annotation ++ as ++ dd ++ lang)) - , el "document-info" (el "program-used" "pandoc" : coverpage) + [ el "title-info" (genre : + (as ++ bt ++ annotation ++ dd ++ coverpage ++ lang)) + , el "document-info" [el "program-used" "pandoc"] ] booktitle :: PandocMonad m => Meta -> FBM m [Content] @@ -398,6 +399,11 @@ plainToPara (Para inlines : rest) = Para inlines : HorizontalRule : plainToPara rest -- HorizontalRule will be converted to <empty-line /> plainToPara (p:rest) = p : plainToPara rest +-- Replace plain text with paragraphs +unPlain :: Block -> Block +unPlain (Plain inlines) = Para inlines +unPlain x = x + -- Simulate increased indentation level. Will not really work -- for multi-line paragraphs. indentPrefix :: String -> Block -> Block diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index a09ad2fda..46f754226 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -50,13 +50,13 @@ import Prelude import Control.Monad.State.Strict import Data.Char (ord, toLower) import Data.List (intercalate, intersperse, isPrefixOf, partition) -import Data.Maybe (catMaybes, fromMaybe, isJust, isNothing) +import Data.Maybe (catMaybes, fromMaybe, isJust, isNothing, mapMaybe) import qualified Data.Set as Set import Data.String (fromString) import Data.Text (Text) import qualified Data.Text.Lazy as TL import Network.HTTP (urlEncode) -import Network.URI (URI (..), parseURIReference, unEscapeString) +import Network.URI (URI (..), parseURIReference) import Numeric (showHex) import Text.Blaze.Internal (customLeaf, customParent, MarkupM(Empty)) #if MIN_VERSION_blaze_markup(0,6,3) @@ -75,7 +75,7 @@ import Text.Pandoc.Templates import Text.Pandoc.Walk import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared -import Text.Pandoc.XML (escapeStringForXML, fromEntities) +import Text.Pandoc.XML (escapeStringForXML, fromEntities, toEntities) #if MIN_VERSION_blaze_markup(0,6,3) #else import Text.Blaze.Internal (preEscapedString, preEscapedText) @@ -206,7 +206,8 @@ writeHtmlString' :: PandocMonad m => WriterState -> WriterOptions -> Pandoc -> m Text writeHtmlString' st opts d = do (body, context) <- evalStateT (pandocToHtml opts d) st - case writerTemplate opts of + (if writerPreferAscii opts then toEntities else id) <$> + case writerTemplate opts of Nothing -> return $ renderHtml' body Just tpl -> do -- warn if empty lang @@ -221,16 +222,19 @@ writeHtmlString' st opts d = do lookup "sourcefile" (writerVariables opts) report $ NoTitleElement fallback return $ resetField "pagetitle" fallback context - renderTemplate' tpl $ - defField "body" (renderHtml' body) context' + renderTemplate' tpl + (defField "body" (renderHtml' body) context') writeHtml' :: PandocMonad m => WriterState -> WriterOptions -> Pandoc -> m Html writeHtml' st opts d = case writerTemplate opts of Just _ -> preEscapedText <$> writeHtmlString' st opts d - Nothing -> do - (body, _) <- evalStateT (pandocToHtml opts d) st - return body + Nothing + | writerPreferAscii opts + -> preEscapedText <$> writeHtmlString' st opts d + | otherwise -> do + (body, _) <- evalStateT (pandocToHtml opts d) st + return body -- result is (title, authors, date, toc, body, new variables) pandocToHtml :: PandocMonad m @@ -259,7 +263,7 @@ pandocToHtml opts (Pandoc meta blocks) = do st <- get notes <- footnoteSection opts (reverse (stNotes st)) let thebody = blocks' >> notes - let math = case writerHTMLMathMethod opts of + let math = case writerHTMLMathMethod opts of MathJax url | slideVariant /= RevealJsSlides -> -- mathjax is handled via a special plugin in revealjs @@ -273,10 +277,10 @@ pandocToHtml opts (Pandoc meta blocks) = do KaTeX url -> do H.script ! A.src (toValue $ url ++ "katex.min.js") $ mempty - H.script ! - A.src (toValue $ url ++ "contrib/auto-render.min.js") $ mempty + nl opts H.script - "document.addEventListener(\"DOMContentLoaded\", function() {\n renderMathInElement(document.body);\n});" + "document.addEventListener(\"DOMContentLoaded\", function () {\n var mathElements = document.getElementsByClassName(\"math\");\n for (var i = 0; i < mathElements.length; i++) {\n var texText = mathElements[i].firstChild;\n if (mathElements[i].tagName == \"SPAN\") { katex.render(texText.data, mathElements[i], { displayMode: mathElements[i].classList.contains(\"display\"), throwOnError: false } );\n }}});" + nl opts H.link ! A.rel "stylesheet" ! A.href (toValue $ url ++ "katex.min.css") @@ -296,10 +300,11 @@ pandocToHtml opts (Pandoc meta blocks) = do (if stMath st then defField "math" (renderHtml' math) else id) $ - defField "mathjax" - (case writerHTMLMathMethod opts of - MathJax _ -> True - _ -> False) $ + (case writerHTMLMathMethod opts of + MathJax u -> defField "mathjax" True . + defField "mathjaxurl" + (takeWhile (/='?') u) + _ -> defField "mathjax" False) $ defField "quotes" (stQuotes st) $ -- for backwards compatibility we populate toc -- with the contents of the toc, rather than a @@ -460,7 +465,7 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen t <- addAttrs opts attr $ secttag header' return $ - (if slideVariant == RevealJsSlides + (if slideVariant == RevealJsSlides && not (null innerContents) then H5.section else id) $ mconcat $ t : innerContents else if writerSectionDivs opts || slide @@ -576,12 +581,23 @@ toAttrs :: PandocMonad m => [(String, String)] -> StateT WriterState m [Attribute] toAttrs kvs = do html5 <- gets stHtml5 - return $ map (\(x,y) -> - customAttribute - (fromString (if not html5 || x `Set.member` html5Attributes - || "data-" `isPrefixOf` x - then x - else "data-" ++ x)) (toValue y)) kvs + mbEpubVersion <- gets stEPUBVersion + return $ mapMaybe (\(x,y) -> + if html5 + then + if x `Set.member` html5Attributes + || ':' `elem` x -- e.g. epub: namespace + || "data-" `isPrefixOf` x + then Just $ customAttribute (fromString x) (toValue y) + else Just $ customAttribute (fromString ("data-" ++ x)) + (toValue y) + else + if mbEpubVersion == Just EPUB2 && + not (x `Set.member` html4Attributes || + "xml:" `isPrefixOf` x) + then Nothing + else Just $ customAttribute (fromString x) (toValue y)) + kvs attrsToHtml :: PandocMonad m => WriterOptions -> Attr -> StateT WriterState m [Attribute] @@ -828,9 +844,7 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do return $ foldl (!) l attribs blockToHtml opts (DefinitionList lst) = do contents <- mapM (\(term, defs) -> - do term' <- if null term - then return mempty - else liftM H.dt $ inlineListToHtml opts term + do term' <- liftM H.dt $ inlineListToHtml opts term defs' <- mapM (liftM (\x -> H.dd (x >> nl opts)) . blockListToHtml opts) defs return $ mconcat $ nl opts : term' : nl opts : @@ -1051,8 +1065,8 @@ inlineToHtml opts inline = do DisplayMath -> "\\[" ++ str ++ "\\]" KaTeX _ -> return $ H.span ! A.class_ mathClass $ toHtml $ case t of - InlineMath -> "\\(" ++ str ++ "\\)" - DisplayMath -> "\\[" ++ str ++ "\\]" + InlineMath -> str + DisplayMath -> str PlainMath -> do x <- lift (texMathToInlines t str) >>= inlineListToHtml opts let m = H.span ! A.class_ mathClass $ x @@ -1084,10 +1098,7 @@ inlineToHtml opts inline = do in '#' : prefix ++ xs _ -> s let link = H.a ! A.href (toValue s') $ linkText - let attr = if txt == [Str (unEscapeString s)] - then (ident, "uri" : classes, kvs) - else (ident, classes, kvs) - link' <- addAttrs opts attr link + link' <- addAttrs opts (ident, classes, kvs) link return $ if null tit then link' else link' ! A.title (toValue tit) @@ -1422,3 +1433,125 @@ html5Attributes = Set.fromList , "workertype" , "wrap" ] + +html4Attributes :: Set.Set String +html4Attributes = Set.fromList + [ "abbr" + , "accept" + , "accept-charset" + , "accesskey" + , "action" + , "align" + , "alink" + , "alt" + , "archive" + , "axis" + , "background" + , "bgcolor" + , "border" + , "cellpadding" + , "cellspacing" + , "char" + , "charoff" + , "charset" + , "checked" + , "cite" + , "class" + , "classid" + , "clear" + , "code" + , "codebase" + , "codetype" + , "color" + , "cols" + , "colspan" + , "compact" + , "content" + , "coords" + , "data" + , "datetime" + , "declare" + , "defer" + , "dir" + , "disabled" + , "enctype" + , "face" + , "for" + , "frame" + , "frameborder" + , "headers" + , "height" + , "href" + , "hreflang" + , "hspace" + , "http-equiv" + , "id" + , "ismap" + , "label" + , "lang" + , "language" + , "link" + , "longdesc" + , "marginheight" + , "marginwidth" + , "maxlength" + , "media" + , "method" + , "multiple" + , "name" + , "nohref" + , "noresize" + , "noshade" + , "nowrap" + , "object" + , "onblur" + , "onchange" + , "onclick" + , "ondblclick" + , "onfocus" + , "onkeydown" + , "onkeypress" + , "onkeyup" + , "onload" + , "onmousedown" + , "onmousemove" + , "onmouseout" + , "onmouseover" + , "onmouseup" + , "onreset" + , "onselect" + , "onsubmit" + , "onunload" + , "profile" + , "prompt" + , "readonly" + , "rel" + , "rev" + , "rows" + , "rowspan" + , "rules" + , "scheme" + , "scope" + , "scrolling" + , "selected" + , "shape" + , "size" + , "span" + , "src" + , "standby" + , "start" + , "style" + , "summary" + , "tabindex" + , "target" + , "text" + , "title" + , "usemap" + , "valign" + , "value" + , "valuetype" + , "version" + , "vlink" + , "vspace" + , "width" + ] diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 75b8c78dc..80e092b6a 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -45,7 +45,6 @@ import Text.Pandoc.Options import Text.Pandoc.Pretty import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Writers.Math (texMathToInlines) import Text.Pandoc.Writers.Shared type Notes = [[Block]] @@ -208,13 +207,13 @@ blockListToHaddock :: PandocMonad m -> [Block] -- ^ List of block elements -> StateT WriterState m Doc blockListToHaddock opts blocks = - mapM (blockToHaddock opts) blocks >>= return . cat + cat <$> mapM (blockToHaddock opts) blocks -- | Convert list of Pandoc inline elements to haddock. inlineListToHaddock :: PandocMonad m => WriterOptions -> [Inline] -> StateT WriterState m Doc inlineListToHaddock opts lst = - mapM (inlineToHaddock opts) lst >>= return . cat + cat <$> mapM (inlineToHaddock opts) lst -- | Convert Pandoc inline element to haddock. inlineToHaddock :: PandocMonad m @@ -250,11 +249,10 @@ inlineToHaddock _ (Code _ str) = return $ "@" <> text (escapeString str) <> "@" inlineToHaddock _ (Str str) = return $ text $ escapeString str -inlineToHaddock opts (Math mt str) = do - let adjust x = case mt of - DisplayMath -> cr <> x <> cr - InlineMath -> x - adjust <$> (lift (texMathToInlines mt str) >>= inlineListToHaddock opts) +inlineToHaddock _ (Math mt str) = + return $ case mt of + DisplayMath -> cr <> "\\[" <> text str <> "\\]" <> cr + InlineMath -> "\\(" <> text str <> "\\)" inlineToHaddock _ il@(RawInline f str) | f == "haddock" = return $ text str | otherwise = do diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index 266d58007..ef1e2af0a 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -149,11 +149,12 @@ writeICML opts (Pandoc meta blocks) = do $ defField "charStyles" (render' $ charStylesToDoc st) $ defField "parStyles" (render' $ parStylesToDoc st) $ defField "hyperlinks" (render' $ hyperlinksToDoc $ links st) metadata - case writerTemplate opts of + (if writerPreferAscii opts then toEntities else id) <$> + case writerTemplate opts of Nothing -> return main Just tpl -> renderTemplate' tpl context --- | Auxilary functions for parStylesToDoc and charStylesToDoc. +-- | Auxiliary functions for parStylesToDoc and charStylesToDoc. contains :: String -> (String, (String, String)) -> [(String, String)] contains s rule = [snd rule | (fst rule) `isInfixOf` s] diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index fb3236bd9..4e78a4cce 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -102,7 +102,8 @@ docToJATS opts (Pandoc meta blocks) = do $ defField "mathml" (case writerHTMLMathMethod opts of MathML -> True _ -> False) metadata - case writerTemplate opts of + (if writerPreferAscii opts then toEntities else id) <$> + case writerTemplate opts of Nothing -> return main Just tpl -> renderTemplate' tpl context @@ -344,7 +345,7 @@ inlineToJATS _ (Str str) = return $ text $ escapeStringForXML str inlineToJATS opts (Emph lst) = inTagsSimple "italic" <$> inlinesToJATS opts lst inlineToJATS opts (Strong lst) = - inTags False "bold" [("role", "strong")] <$> inlinesToJATS opts lst + inTagsSimple "bold" <$> inlinesToJATS opts lst inlineToJATS opts (Strikeout lst) = inTagsSimple "strike" <$> inlinesToJATS opts lst inlineToJATS opts (Superscript lst) = @@ -352,8 +353,7 @@ inlineToJATS opts (Superscript lst) = inlineToJATS opts (Subscript lst) = inTagsSimple "sub" <$> inlinesToJATS opts lst inlineToJATS opts (SmallCaps lst) = - inTags False "sc" [("role", "smallcaps")] <$> - inlinesToJATS opts lst + inTagsSimple "sc" <$> inlinesToJATS opts lst inlineToJATS opts (Quoted SingleQuote lst) = do contents <- inlinesToJATS opts lst return $ char '‘' <> contents <> char '’' diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 2904bec06..c1b5d0fa4 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -42,8 +42,9 @@ import Data.Aeson (FromJSON, object, (.=)) import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isPunctuation, ord, toLower) import Data.List (foldl', intercalate, intersperse, isInfixOf, nubBy, - stripPrefix, (\\)) + stripPrefix, (\\), uncons) import Data.Maybe (catMaybes, fromMaybe, isJust, mapMaybe, isNothing) +import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T import Network.URI (unEscapeString) @@ -63,6 +64,7 @@ import Text.Pandoc.Walk import Text.Pandoc.Writers.Shared import qualified Text.Parsec as P import Text.Printf (printf) +import qualified Data.Text.Normalize as Normalize data WriterState = WriterState { stInNote :: Bool -- true if we're in a note @@ -176,9 +178,9 @@ pandocToLaTeX options (Pandoc meta blocks) = do modify $ \s -> s{stCsquotes = True} let (blocks'', lastHeader) = if writerCiteMethod options == Citeproc then (blocks', []) - else case last blocks' of - Header 1 _ il -> (init blocks', il) - _ -> (blocks', []) + else case reverse blocks' of + Header 1 _ il : _ -> (init blocks', il) + _ -> (blocks', []) beamer <- gets stBeamer blocks''' <- if beamer then toSlides blocks'' @@ -248,7 +250,8 @@ pandocToLaTeX options (Pandoc meta blocks) = do defField "biblatex" True _ -> id) $ defField "colorlinks" (any hasStringValue - ["citecolor", "urlcolor", "linkcolor", "toccolor"]) $ + ["citecolor", "urlcolor", "linkcolor", "toccolor", + "filecolor"]) $ (if null dirs then id else defField "dir" ("ltr" :: String)) $ @@ -317,46 +320,110 @@ data StringContext = TextString -- escape things as needed for LaTeX stringToLaTeX :: PandocMonad m => StringContext -> String -> LW m String -stringToLaTeX _ [] = return "" -stringToLaTeX ctx (x:xs) = do +stringToLaTeX context zs = do opts <- gets stOptions - rest <- stringToLaTeX ctx xs - let ligatures = isEnabled Ext_smart opts && ctx == TextString - let isUrl = ctx == URLString - return $ + go opts context $ + if writerPreferAscii opts + then T.unpack $ Normalize.normalize Normalize.NFD $ T.pack zs + else zs + where + go _ _ [] = return "" + go opts ctx (x:xs) = do + let ligatures = isEnabled Ext_smart opts && ctx == TextString + let isUrl = ctx == URLString + let mbAccentCmd = + if writerPreferAscii opts && ctx == TextString + then uncons xs >>= \(c,_) -> M.lookup c accents + else Nothing + let emits s = + case mbAccentCmd of + Just cmd -> ((cmd ++ "{" ++ s ++ "}") ++) + <$> go opts ctx (drop 1 xs) -- drop combining accent + Nothing -> (s++) <$> go opts ctx xs + let emitc c = + case mbAccentCmd of + Just cmd -> ((cmd ++ "{" ++ [c] ++ "}") ++) + <$> go opts ctx (drop 1 xs) -- drop combining accent + Nothing -> (c:) <$> go opts ctx xs case x of - '{' -> "\\{" ++ rest - '}' -> "\\}" ++ rest - '`' | ctx == CodeString -> "\\textasciigrave{}" ++ rest - '$' | not isUrl -> "\\$" ++ rest - '%' -> "\\%" ++ rest - '&' -> "\\&" ++ rest - '_' | not isUrl -> "\\_" ++ rest - '#' -> "\\#" ++ rest - '-' | not isUrl -> case xs of - -- prevent adjacent hyphens from forming ligatures - ('-':_) -> "-\\/" ++ rest - _ -> '-' : rest - '~' | not isUrl -> "\\textasciitilde{}" ++ rest - '^' -> "\\^{}" ++ rest - '\\'| isUrl -> '/' : rest -- NB. / works as path sep even on Windows - | otherwise -> "\\textbackslash{}" ++ rest - '|' | not isUrl -> "\\textbar{}" ++ rest - '<' -> "\\textless{}" ++ rest - '>' -> "\\textgreater{}" ++ rest - '[' -> "{[}" ++ rest -- to avoid interpretation as - ']' -> "{]}" ++ rest -- optional arguments - '\'' | ctx == CodeString -> "\\textquotesingle{}" ++ rest - '\160' -> "~" ++ rest - '\x202F' -> "\\," ++ rest - '\x2026' -> "\\ldots{}" ++ rest - '\x2018' | ligatures -> "`" ++ rest - '\x2019' | ligatures -> "'" ++ rest - '\x201C' | ligatures -> "``" ++ rest - '\x201D' | ligatures -> "''" ++ rest - '\x2014' | ligatures -> "---" ++ rest - '\x2013' | ligatures -> "--" ++ rest - _ -> x : rest + '{' -> emits "\\{" + '}' -> emits "\\}" + '`' | ctx == CodeString -> emits "\\textasciigrave{}" + '$' | not isUrl -> emits "\\$" + '%' -> emits "\\%" + '&' -> emits "\\&" + '_' | not isUrl -> emits "\\_" + '#' -> emits "\\#" + '-' | not isUrl -> case xs of + -- prevent adjacent hyphens from forming ligatures + ('-':_) -> emits "-\\/" + _ -> emitc '-' + '~' | not isUrl -> emits "\\textasciitilde{}" + '^' -> emits "\\^{}" + '\\'| isUrl -> emitc '/' -- NB. / works as path sep even on Windows + | otherwise -> emits "\\textbackslash{}" + '|' | not isUrl -> emits "\\textbar{}" + '<' -> emits "\\textless{}" + '>' -> emits "\\textgreater{}" + '[' -> emits "{[}" -- to avoid interpretation as + ']' -> emits "{]}" -- optional arguments + '\'' | ctx == CodeString -> emits "\\textquotesingle{}" + '\160' -> emits "~" + '\x202F' -> emits "\\," + '\x2026' -> emits "\\ldots{}" + '\x2018' | ligatures -> emits "`" + '\x2019' | ligatures -> emits "'" + '\x201C' | ligatures -> emits "``" + '\x201D' | ligatures -> emits "''" + '\x2014' | ligatures -> emits "---" + '\x2013' | ligatures -> emits "--" + _ | writerPreferAscii opts + -> case x of + 'ı' -> emits "\\i " + 'ȷ' -> emits "\\j " + 'å' -> emits "\\aa " + 'Å' -> emits "\\AA " + 'ß' -> emits "\\ss " + 'ø' -> emits "\\o " + 'Ø' -> emits "\\O " + 'Ł' -> emits "\\L " + 'ł' -> emits "\\l " + 'æ' -> emits "\\ae " + 'Æ' -> emits "\\AE " + 'œ' -> emits "\\oe " + 'Œ' -> emits "\\OE " + '£' -> emits "\\pounds " + '€' -> emits "\\euro " + '©' -> emits "\\copyright " + _ -> emitc x + | otherwise -> emitc x + +accents :: M.Map Char String +accents = M.fromList + [ ('\779' , "\\H") + , ('\768' , "\\`") + , ('\769' , "\\'") + , ('\770' , "\\^") + , ('\771' , "\\~") + , ('\776' , "\\\"") + , ('\775' , "\\.") + , ('\772' , "\\=") + , ('\781' , "\\|") + , ('\817' , "\\b") + , ('\807' , "\\c") + , ('\783' , "\\G") + , ('\777' , "\\h") + , ('\803' , "\\d") + , ('\785' , "\\f") + , ('\778' , "\\r") + , ('\865' , "\\t") + , ('\782' , "\\U") + , ('\780' , "\\v") + , ('\774' , "\\u") + , ('\808' , "\\k") + , ('\785' , "\\newtie") + , ('\8413', "\\textcircled") + ] toLabel :: PandocMonad m => String -> LW m String toLabel z = go `fmap` stringToLaTeX URLString z @@ -402,7 +469,8 @@ elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts) not (null $ query hasCodeBlock elts ++ query hasCode elts) let frameoptions = ["allowdisplaybreaks", "allowframebreaks", "fragile", "b", "c", "t", "environment", - "label", "plain", "shrink", "standout"] + "label", "plain", "shrink", "standout", + "noframenumbering"] let optionslist = ["fragile" | fragile && isNothing (lookup "fragile" kvs)] ++ [k | k <- classes, k `elem` frameoptions] ++ [k ++ "=" ++ v | (k,v) <- kvs, k `elem` frameoptions] @@ -487,7 +555,7 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) then \contents -> let fromPct xs = case reverse xs of - '%':ds -> '0':'.': reverse ds + '%':ds -> showFl (read (reverse ds) / 100 :: Double) _ -> xs w = maybe "0.48" fromPct (lookup "width" kvs) in inCmd "begin" "column" <> @@ -517,25 +585,15 @@ blockToLaTeX (Plain lst) = inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst -- title beginning with fig: indicates that the image is a figure blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,'f':'i':'g':':':tit)]) = do - inNote <- gets stInNote - inMinipage <- gets stInMinipage - modify $ \st -> st{ stInMinipage = True, stNotes = [] } - capt <- inlineListToLaTeX txt - notes <- gets stNotes - modify $ \st -> st{ stInMinipage = False, stNotes = [] } - - -- We can't have footnotes in the list of figures, so remove them: - captForLof <- if null notes - then return empty - else brackets <$> inlineListToLaTeX (walk deNote txt) - img <- inlineToLaTeX (Image attr txt (src,tit)) - let footnotes = notesToLaTeX notes + (capt, captForLof, footnotes) <- getCaption txt lab <- labelFor ident let caption = "\\caption" <> captForLof <> braces capt <> lab + img <- inlineToLaTeX (Image attr txt (src,tit)) innards <- hypertarget True ident $ "\\centering" $$ img $$ caption <> cr let figure = cr <> "\\begin{figure}" $$ innards $$ "\\end{figure}" - return $ if inNote || inMinipage + st <- get + return $ if stInNote st || stInMinipage st -- can't have figures in notes or minipage (here, table cell) -- http://www.tex.ac.uk/FAQ-ouparmd.html then "\\begin{center}" $$ img $+$ capt $$ "\\end{center}" @@ -714,11 +772,11 @@ blockToLaTeX (Header level (id',classes,_) lst) = do modify $ \s -> s{stInHeading = False} return hdr blockToLaTeX (Table caption aligns widths heads rows) = do + (captionText, captForLof, footnotes) <- getCaption caption let toHeaders hs = do contents <- tableRowToLaTeX True aligns widths hs return ("\\toprule" $$ contents $$ "\\midrule") let removeNote (Note _) = Span ("", [], []) [] removeNote x = x - captionText <- inlineListToLaTeX caption firsthead <- if isEmpty captionText || all null heads then return empty else ($$ text "\\endfirsthead") <$> toHeaders heads @@ -730,8 +788,8 @@ blockToLaTeX (Table caption aligns widths heads rows) = do else walk removeNote heads) let capt = if isEmpty captionText then empty - else text "\\caption" <> - braces captionText <> "\\tabularnewline" + else "\\caption" <> captForLof <> braces captionText + <> "\\tabularnewline" rows' <- mapM (tableRowToLaTeX False aligns widths) rows let colDescriptors = text $ concatMap toColDescriptor aligns modify $ \s -> s{ stTable = True } @@ -745,6 +803,21 @@ blockToLaTeX (Table caption aligns widths heads rows) = do $$ vcat rows' $$ "\\bottomrule" $$ "\\end{longtable}" + $$ footnotes + +getCaption :: PandocMonad m => [Inline] -> LW m (Doc, Doc, Doc) +getCaption txt = do + inMinipage <- gets stInMinipage + modify $ \st -> st{ stInMinipage = True, stNotes = [] } + capt <- inlineListToLaTeX txt + notes <- gets stNotes + modify $ \st -> st{ stInMinipage = inMinipage, stNotes = [] } + -- We can't have footnotes in the list of figures/tables, so remove them: + captForLof <- if null notes + then return empty + else brackets <$> inlineListToLaTeX (walk deNote txt) + let footnotes = notesToLaTeX notes + return (capt, captForLof, footnotes) toColDescriptor :: Alignment -> String toColDescriptor align = @@ -863,9 +936,11 @@ defListItemToLaTeX (term, defs) = do else term' def' <- liftM vsep $ mapM blockListToLaTeX defs return $ case defs of - ((Header{} : _) : _) -> + ((Header{} : _) : _) -> + "\\item" <> brackets term'' <> " ~ " $$ def' + ((CodeBlock{} : _) : _) -> -- see #4662 "\\item" <> brackets term'' <> " ~ " $$ def' - _ -> + _ -> "\\item" <> brackets term'' $$ def' -- | Craft the section header, inserting the secton reference, if supplied. diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 912231a88..81fa38bd7 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -107,7 +107,8 @@ pandocToMan opts (Pandoc meta blocks) = do $ defField "has-tables" hasTables $ defField "hyphenate" True $ defField "pandoc-version" pandocVersion metadata - case writerTemplate opts of + (if writerPreferAscii opts then groffEscape else id) <$> + case writerTemplate opts of Nothing -> return main Just tpl -> renderTemplate' tpl context @@ -152,32 +153,6 @@ escapeCode = intercalate "\n" . map escapeLine . lines where -- line. groff/troff treats the line-ending period differently. -- See http://code.google.com/p/pandoc/issues/detail?id=148. --- | Returns the first sentence in a list of inlines, and the rest. -breakSentence :: [Inline] -> ([Inline], [Inline]) -breakSentence [] = ([],[]) -breakSentence xs = - let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True - isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True - isSentenceEndInline LineBreak = True - isSentenceEndInline _ = False - (as, bs) = break isSentenceEndInline xs - in case bs of - [] -> (as, []) - [c] -> (as ++ [c], []) - (c:Space:cs) -> (as ++ [c], cs) - (c:SoftBreak:cs) -> (as ++ [c], cs) - (Str ".":Str (')':ys):cs) -> (as ++ [Str ".", Str (')':ys)], cs) - (x@(Str ('.':')':_)):cs) -> (as ++ [x], cs) - (LineBreak:x@(Str ('.':_)):cs) -> (as ++[LineBreak], x:cs) - (c:cs) -> (as ++ [c] ++ ds, es) - where (ds, es) = breakSentence cs - --- | Split a list of inlines into sentences. -splitSentences :: [Inline] -> [[Inline]] -splitSentences xs = - let (sent, rest) = breakSentence xs - in if null rest then [sent] else sent : splitSentences rest - -- | Convert Pandoc block element to man. blockToMan :: PandocMonad m => WriterOptions -- ^ Options @@ -325,11 +300,11 @@ blockListToMan :: PandocMonad m -> [Block] -- ^ List of block elements -> StateT WriterState m Doc blockListToMan opts blocks = - mapM (blockToMan opts) blocks >>= (return . vcat) + vcat <$> mapM (blockToMan opts) blocks -- | Convert list of Pandoc inline elements to man. inlineListToMan :: PandocMonad m => WriterOptions -> [Inline] -> StateT WriterState m Doc -inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat) +inlineListToMan opts lst = hcat <$> mapM (inlineToMan opts) lst -- | Convert Pandoc inline element to man. inlineToMan :: PandocMonad m => WriterOptions -> Inline -> StateT WriterState m Doc diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 075858e5e..9a4acb59d 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -38,7 +38,7 @@ module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where import Prelude import Control.Monad.Reader import Control.Monad.State.Strict -import Data.Char (chr, isPunctuation, isSpace, ord, isAlphaNum) +import Data.Char (isPunctuation, isSpace, isAlphaNum) import Data.Default import qualified Data.HashMap.Strict as H import Data.List (find, group, intersperse, sortBy, stripPrefix, transpose) @@ -50,7 +50,7 @@ import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T import qualified Data.Vector as V -import Data.Yaml (Value (Array, Bool, Number, Object, String)) +import Data.Aeson (Value (Array, Bool, Number, Object, String)) import Network.HTTP (urlEncode) import Text.HTML.TagSoup (Tag (..), isTagText, parseTags) import Text.Pandoc.Class (PandocMonad, report) @@ -298,7 +298,8 @@ escapeString opts (c:cs) = '\\':c:escapeString opts cs '|' | isEnabled Ext_pipe_tables opts -> '\\':'|':escapeString opts cs '^' | isEnabled Ext_superscript opts -> '\\':'^':escapeString opts cs - '~' | isEnabled Ext_subscript opts -> '\\':'~':escapeString opts cs + '~' | isEnabled Ext_subscript opts || + isEnabled Ext_strikeout opts -> '\\':'~':escapeString opts cs '$' | isEnabled Ext_tex_math_dollars opts -> '\\':'$':escapeString opts cs '\'' | isEnabled Ext_smart opts -> '\\':'\'':escapeString opts cs '"' | isEnabled Ext_smart opts -> '\\':'"':escapeString opts cs @@ -452,8 +453,14 @@ blockToMarkdown' opts (Plain inlines) = do | otherwise -> contents return $ contents' <> cr -- title beginning with fig: indicates figure -blockToMarkdown' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = - blockToMarkdown opts (Para [Image attr alt (src,tit)]) +blockToMarkdown' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) + | isEnabled Ext_raw_html opts && + not (isEnabled Ext_link_attributes opts) && + attr /= nullAttr = -- use raw HTML + (text . T.unpack . T.strip) <$> + writeHtml5String opts{ writerTemplate = Nothing } + (Pandoc nullMeta [Para [Image attr alt (src,"fig:" ++ tit)]]) + | otherwise = blockToMarkdown opts (Para [Image attr alt (src,tit)]) blockToMarkdown' opts (Para inlines) = (<> blankline) `fmap` blockToMarkdown opts (Plain inlines) blockToMarkdown' opts (LineBlock lns) = @@ -619,7 +626,7 @@ blockToMarkdown' opts t@(Table caption aligns widths headers rows) = do (all null headers) aligns' widths' headers rows | isEnabled Ext_raw_html opts -> fmap (id,) $ (text . T.unpack) <$> - (writeHtml5String def $ Pandoc nullMeta [t]) + (writeHtml5String opts{ writerTemplate = Nothing } $ Pandoc nullMeta [t]) | hasSimpleCells && isEnabled Ext_pipe_tables opts -> do rawHeaders <- padRow <$> mapM (blockListToMarkdown opts) headers @@ -976,6 +983,11 @@ isRight (Left _) = False -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: PandocMonad m => WriterOptions -> Inline -> MD m Doc +inlineToMarkdown opts (Span ("",["emoji"],kvs) [Str s]) = do + case lookup "data-emoji" kvs of + Just emojiname | isEnabled Ext_emoji opts -> + return $ ":" <> text emojiname <> ":" + _ -> inlineToMarkdown opts (Str s) inlineToMarkdown opts (Span attrs ils) = do plain <- asks envPlain contents <- inlineListToMarkdown opts ils @@ -1172,7 +1184,7 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit)) not (isEnabled Ext_link_attributes opts) && attr /= nullAttr = -- use raw HTML (text . T.unpack . T.strip) <$> - writeHtml5String def (Pandoc nullMeta [Plain [lnk]]) + writeHtml5String opts{ writerTemplate = Nothing } (Pandoc nullMeta [Plain [lnk]]) | otherwise = do plain <- asks envPlain linktext <- inlineListToMarkdown opts txt @@ -1212,7 +1224,7 @@ inlineToMarkdown opts img@(Image attr alternate (source, tit)) not (isEnabled Ext_link_attributes opts) && attr /= nullAttr = -- use raw HTML (text . T.unpack . T.strip) <$> - writeHtml5String def (Pandoc nullMeta [Plain [img]]) + writeHtml5String opts{ writerTemplate = Nothing } (Pandoc nullMeta [Plain [img]]) | otherwise = do plain <- asks envPlain let txt = if null alternate || alternate == [Str source] @@ -1237,33 +1249,6 @@ makeMathPlainer = walk go go (Emph xs) = Span nullAttr xs go x = x -toSuperscript :: Char -> Maybe Char -toSuperscript '1' = Just '\x00B9' -toSuperscript '2' = Just '\x00B2' -toSuperscript '3' = Just '\x00B3' -toSuperscript '+' = Just '\x207A' -toSuperscript '-' = Just '\x207B' -toSuperscript '=' = Just '\x207C' -toSuperscript '(' = Just '\x207D' -toSuperscript ')' = Just '\x207E' -toSuperscript c - | c >= '0' && c <= '9' = - Just $ chr (0x2070 + (ord c - 48)) - | isSpace c = Just c - | otherwise = Nothing - -toSubscript :: Char -> Maybe Char -toSubscript '+' = Just '\x208A' -toSubscript '-' = Just '\x208B' -toSubscript '=' = Just '\x208C' -toSubscript '(' = Just '\x208D' -toSubscript ')' = Just '\x208E' -toSubscript c - | c >= '0' && c <= '9' = - Just $ chr (0x2080 + (ord c - 48)) - | isSpace c = Just c - | otherwise = Nothing - lineBreakToSpace :: Inline -> Inline lineBreakToSpace LineBreak = Space lineBreakToSpace SoftBreak = Space diff --git a/src/Text/Pandoc/Writers/Math.hs b/src/Text/Pandoc/Writers/Math.hs index 99d17d594..61decf2df 100644 --- a/src/Text/Pandoc/Writers/Math.hs +++ b/src/Text/Pandoc/Writers/Math.hs @@ -55,4 +55,4 @@ defaultMathJaxURL :: String defaultMathJaxURL = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/" defaultKaTeXURL :: String -defaultKaTeXURL = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.8.3/" +defaultKaTeXURL = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.9.0/" diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index df50028a0..666853a3c 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -313,6 +313,7 @@ tableCellToMediaWiki headless rownum (alignment, width, bs) = do let sep = case bs of [Plain _] -> " " [Para _] -> " " + [] -> "" _ -> "\n" return $ marker ++ attr ++ sep ++ trimr contents diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index 16a66c85b..9a35a9693 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -127,7 +127,8 @@ pandocToMs opts (Pandoc meta blocks) = do $ defField "title-meta" titleMeta $ defField "author-meta" (intercalate "; " authorsMeta) $ defField "highlighting-macros" highlightingMacros metadata - case writerTemplate opts of + (if writerPreferAscii opts then groffEscape else id) <$> + case writerTemplate opts of Nothing -> return main Just tpl -> renderTemplate' tpl context @@ -188,32 +189,6 @@ escapeCode = intercalate "\n" . map escapeLine . lines -- line. groff/troff treats the line-ending period differently. -- See http://code.google.com/p/pandoc/issues/detail?id=148. --- | Returns the first sentence in a list of inlines, and the rest. -breakSentence :: [Inline] -> ([Inline], [Inline]) -breakSentence [] = ([],[]) -breakSentence xs = - let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True - isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True - isSentenceEndInline LineBreak = True - isSentenceEndInline _ = False - (as, bs) = break isSentenceEndInline xs - in case bs of - [] -> (as, []) - [c] -> (as ++ [c], []) - (c:Space:cs) -> (as ++ [c], cs) - (c:SoftBreak:cs) -> (as ++ [c], cs) - (Str ".":Str (')':ys):cs) -> (as ++ [Str ".", Str (')':ys)], cs) - (x@(Str ('.':')':_)):cs) -> (as ++ [x], cs) - (LineBreak:x@(Str ('.':_)):cs) -> (as ++[LineBreak], x:cs) - (c:cs) -> (as ++ [c] ++ ds, es) - where (ds, es) = breakSentence cs - --- | Split a list of inlines into sentences. -splitSentences :: [Inline] -> [[Inline]] -splitSentences xs = - let (sent, rest) = breakSentence xs - in if null rest then [sent] else sent : splitSentences rest - blockToMs :: PandocMonad m => WriterOptions -- ^ Options -> Block -- ^ Block element @@ -434,7 +409,7 @@ blockListToMs :: PandocMonad m -> [Block] -- ^ List of block elements -> MS m Doc blockListToMs opts blocks = - mapM (blockToMs opts) blocks >>= (return . vcat) + vcat <$> mapM (blockToMs opts) blocks -- | Convert list of Pandoc inline elements to ms. inlineListToMs :: PandocMonad m => WriterOptions -> [Inline] -> MS m Doc diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs index 3681fcc0d..18aebc364 100644 --- a/src/Text/Pandoc/Writers/Muse.hs +++ b/src/Text/Pandoc/Writers/Muse.hs @@ -46,7 +46,7 @@ module Text.Pandoc.Writers.Muse (writeMuse) where import Prelude import Control.Monad.Reader import Control.Monad.State.Strict -import Data.Char (isSpace, isDigit, isAsciiUpper, isAsciiLower) +import Data.Char (isSpace, isAlphaNum, isDigit, isAsciiUpper, isAsciiLower) import Data.Default import Data.Text (Text) import Data.List (intersperse, transpose, isInfixOf) @@ -70,20 +70,24 @@ data WriterEnv = WriterEnv { envOptions :: WriterOptions , envTopLevel :: Bool , envInsideBlock :: Bool - , envInlineStart :: Bool + , envInlineStart :: Bool -- ^ True if there is only whitespace since last newline , envInsideLinkDescription :: Bool -- ^ Escape ] if True - , envAfterSpace :: Bool + , envAfterSpace :: Bool -- ^ There is whitespace (not just newline) before , envOneLine :: Bool -- ^ True if newlines are not allowed + , envInsideAsterisks :: Bool -- ^ True if outer element is emphasis with asterisks + , envNearAsterisks :: Bool -- ^ Rendering inline near asterisks } data WriterState = WriterState { stNotes :: Notes , stIds :: Set.Set String + , stUseTags :: Bool -- ^ Use tags for emphasis, for example because previous character is a letter } instance Default WriterState where def = WriterState { stNotes = [] , stIds = Set.empty + , stUseTags = False } evalMuse :: PandocMonad m => Muse m a -> WriterEnv -> WriterState -> m a @@ -103,6 +107,8 @@ writeMuse opts document = , envInsideLinkDescription = False , envAfterSpace = False , envOneLine = False + , envInsideAsterisks = False + , envNearAsterisks = False } -- | Return Muse representation of document. @@ -212,6 +218,7 @@ blockToMuse (BulletList items) = do => [Block] -> Muse m Doc bulletListItemToMuse item = do + modify $ \st -> st { stUseTags = False } contents <- blockListToMuse item return $ hang 2 "- " contents blockToMuse (DefinitionList items) = do @@ -223,16 +230,18 @@ blockToMuse (DefinitionList items) = do => ([Inline], [[Block]]) -> Muse m Doc definitionListItemToMuse (label, defs) = do + modify $ \st -> st { stUseTags = False } label' <- local (\env -> env { envOneLine = True, envAfterSpace = True }) $ inlineListToMuse' label contents <- vcat <$> mapM descriptionToMuse defs let ind = offset label' - return $ hang ind label' contents + return $ hang ind (nowrap label') contents descriptionToMuse :: PandocMonad m => [Block] -> Muse m Doc descriptionToMuse desc = hang 4 " :: " <$> blockListToMuse desc blockToMuse (Header level (ident,_,_) inlines) = do opts <- asks envOptions + topLevel <- asks envTopLevel contents <- local (\env -> env { envOneLine = True }) $ inlineListToMuse' inlines ids <- gets stIds let autoId = uniqueIdent inlines ids @@ -241,8 +250,8 @@ blockToMuse (Header level (ident,_,_) inlines) = do let attr' = if null ident || (isEnabled Ext_auto_identifiers opts && ident == autoId) then empty else "#" <> text ident <> cr - let header' = text $ replicate level '*' - return $ blankline <> attr' $$ nowrap (header' <> space <> contents) <> blankline + let header' = if topLevel then (text $ replicate level '*') <> space else mempty + return $ blankline <> attr' $$ nowrap (header' <> contents) <> blankline -- https://www.gnu.org/software/emacs-muse/manual/muse.html#Horizontal-Rules-and-Anchors blockToMuse HorizontalRule = return $ blankline $$ "----" $$ blankline blockToMuse (Table caption _ _ headers rows) = do @@ -284,7 +293,11 @@ noteToMuse :: PandocMonad m -> [Block] -> Muse m Doc noteToMuse num note = - hang (length marker) (text marker) <$> blockListToMuse note + hang (length marker) (text marker) <$> + (local (\env -> env { envInsideBlock = True + , envInlineStart = True + , envAfterSpace = True + }) $ blockListToMuse note) where marker = "[" ++ show num ++ "] " @@ -295,6 +308,12 @@ escapeString s = substitute "</verbatim>" "<</verbatim><verbatim>/verbatim>" s ++ "</verbatim>" +-- | Replace newlines with spaces +replaceNewlines :: String -> String +replaceNewlines ('\n':xs) = ' ':replaceNewlines xs +replaceNewlines (x:xs) = x:replaceNewlines xs +replaceNewlines [] = [] + startsWithMarker :: (Char -> Bool) -> String -> Bool startsWithMarker f (' ':xs) = startsWithMarker f xs startsWithMarker f (x:xs) = @@ -321,16 +340,28 @@ containsFootnotes = p s (_:xs) = p xs s [] = False -conditionalEscapeString :: Bool -> String -> String -conditionalEscapeString isInsideLinkDescription s = - if any (`elem` ("#*<=|" :: String)) s || - "::" `isInfixOf` s || - "~~" `isInfixOf` s || - "[[" `isInfixOf` s || - ("]" `isInfixOf` s && isInsideLinkDescription) || - containsFootnotes s - then escapeString s - else s +-- | Return True if string should be escaped with <verbatim> tags +shouldEscapeString :: PandocMonad m + => String + -> Muse m Bool +shouldEscapeString s = do + insideLink <- asks envInsideLinkDescription + return $ null s || + any (`elem` ("#*<=|" :: String)) s || + "::" `isInfixOf` s || + "~~" `isInfixOf` s || + "[[" `isInfixOf` s || + ("]" `isInfixOf` s && insideLink) || + containsFootnotes s + +conditionalEscapeString :: PandocMonad m + => String + -> Muse m String +conditionalEscapeString s = do + shouldEscape <- shouldEscapeString s + return $ if shouldEscape + then escapeString s + else s -- Expand Math and Cite before normalizing inline list preprocessInlineList :: PandocMonad m @@ -389,6 +420,19 @@ fixNotes (Space : n@Note{} : rest) = Str " " : n : fixNotes rest fixNotes (SoftBreak : n@Note{} : rest) = Str " " : n : fixNotes rest fixNotes (x:xs) = x : fixNotes xs +startsWithSpace :: [Inline] -> Bool +startsWithSpace (Space:_) = True +startsWithSpace (SoftBreak:_) = True +startsWithSpace (Str s:_) = stringStartsWithSpace s +startsWithSpace _ = False + +endsWithSpace :: [Inline] -> Bool +endsWithSpace [Space] = True +endsWithSpace [SoftBreak] = True +endsWithSpace [Str s] = stringStartsWithSpace $ reverse s +endsWithSpace (_:xs) = endsWithSpace xs +endsWithSpace [] = False + urlEscapeBrackets :: String -> String urlEscapeBrackets (']':xs) = '%':'5':'D':urlEscapeBrackets xs urlEscapeBrackets (x:xs) = x:urlEscapeBrackets xs @@ -397,22 +441,33 @@ urlEscapeBrackets [] = [] isHorizontalRule :: String -> Bool isHorizontalRule s = length s >= 4 && all (== '-') s -startsWithSpace :: String -> Bool -startsWithSpace (x:_) = isSpace x -startsWithSpace [] = False +stringStartsWithSpace :: String -> Bool +stringStartsWithSpace (x:_) = isSpace x +stringStartsWithSpace "" = False fixOrEscape :: Bool -> Inline -> Bool fixOrEscape sp (Str "-") = sp -fixOrEscape sp (Str ";") = not sp -fixOrEscape _ (Str ">") = True +fixOrEscape sp (Str s@('-':x:_)) = (sp && isSpace x) || isHorizontalRule s +fixOrEscape sp (Str (";")) = not sp +fixOrEscape sp (Str (';':x:_)) = not sp && isSpace x +fixOrEscape _ (Str (">")) = True +fixOrEscape _ (Str ('>':x:_)) = isSpace x fixOrEscape sp (Str s) = (sp && (startsWithMarker isDigit s || startsWithMarker isAsciiLower s || startsWithMarker isAsciiUpper s)) - || isHorizontalRule s || startsWithSpace s + || stringStartsWithSpace s fixOrEscape _ Space = True fixOrEscape _ SoftBreak = True fixOrEscape _ _ = False +inlineListStartsWithAlnum :: PandocMonad m + => [Inline] + -> Muse m Bool +inlineListStartsWithAlnum (Str s:_) = do + esc <- shouldEscapeString s + return $ esc || isAlphaNum (head s) +inlineListStartsWithAlnum _ = return False + -- | Convert list of Pandoc inline elements to Muse renderInlineList :: PandocMonad m => [Inline] @@ -424,86 +479,159 @@ renderInlineList (x:xs) = do start <- asks envInlineStart afterSpace <- asks envAfterSpace topLevel <- asks envTopLevel - r <- inlineToMuse x + insideAsterisks <- asks envInsideAsterisks + nearAsterisks <- asks envNearAsterisks + useTags <- gets stUseTags + alnumNext <- inlineListStartsWithAlnum xs + let newUseTags = useTags || alnumNext + modify $ \st -> st { stUseTags = newUseTags } + + r <- local (\env -> env { envInlineStart = False + , envInsideAsterisks = False + , envNearAsterisks = nearAsterisks || (null xs && insideAsterisks) + }) $ inlineToMuse x opts <- asks envOptions let isNewline = (x == SoftBreak && writerWrapText opts == WrapPreserve) || x == LineBreak lst' <- local (\env -> env { envInlineStart = isNewline , envAfterSpace = x == Space || (not topLevel && isNewline) + , envNearAsterisks = False }) $ renderInlineList xs if start && fixOrEscape afterSpace x then pure (text "<verbatim></verbatim>" <> r <> lst') else pure (r <> lst') -- | Normalize and convert list of Pandoc inline elements to Muse. -inlineListToMuse'' :: PandocMonad m - => Bool - -> [Inline] - -> Muse m Doc -inlineListToMuse'' start lst = do - lst' <- (normalizeInlineList . fixNotes) <$> preprocessInlineList (map (removeKeyValues . replaceSmallCaps) lst) - topLevel <- asks envTopLevel - afterSpace <- asks envAfterSpace - local (\env -> env { envInlineStart = start - , envAfterSpace = afterSpace || (start && not topLevel) - }) $ renderInlineList lst' +inlineListToMuse :: PandocMonad m + => [Inline] + -> Muse m Doc +inlineListToMuse lst = do + lst' <- normalizeInlineList . fixNotes <$> preprocessInlineList (map (removeKeyValues . replaceSmallCaps) lst) + insideAsterisks <- asks envInsideAsterisks + modify $ \st -> st { stUseTags = False } -- Previous character is likely a '>' or some other markup + local (\env -> env { envNearAsterisks = insideAsterisks }) $ renderInlineList lst' inlineListToMuse' :: PandocMonad m => [Inline] -> Muse m Doc -inlineListToMuse' = inlineListToMuse'' True - -inlineListToMuse :: PandocMonad m => [Inline] -> Muse m Doc -inlineListToMuse = inlineListToMuse'' False +inlineListToMuse' lst = do + topLevel <- asks envTopLevel + afterSpace <- asks envAfterSpace + local (\env -> env { envInlineStart = True + , envAfterSpace = afterSpace || not topLevel + }) $ inlineListToMuse lst -- | Convert Pandoc inline element to Muse. inlineToMuse :: PandocMonad m => Inline -> Muse m Doc inlineToMuse (Str str) = do - insideLink <- asks envInsideLinkDescription - return $ text $ conditionalEscapeString insideLink str + escapedStr <- conditionalEscapeString $ replaceNewlines str + let useTags = isAlphaNum $ last escapedStr -- escapedStr is never empty because empty strings are escaped + modify $ \st -> st { stUseTags = useTags } + return $ text escapedStr +inlineToMuse (Emph [Strong lst]) = do + useTags <- gets stUseTags + let lst' = normalizeInlineList lst + if useTags + then do contents <- local (\env -> env { envInsideAsterisks = True }) $ inlineListToMuse lst' + modify $ \st -> st { stUseTags = False } + return $ "<em>**" <> contents <> "**</em>" + else if null lst' || startsWithSpace lst' || endsWithSpace lst' + then do + contents <- local (\env -> env { envInsideAsterisks = False }) $ inlineListToMuse lst' + modify $ \st -> st { stUseTags = True } + return $ "*<strong>" <> contents <> "</strong>*" + else do + contents <- local (\env -> env { envInsideAsterisks = True }) $ inlineListToMuse lst' + modify $ \st -> st { stUseTags = True } + return $ "***" <> contents <> "***" inlineToMuse (Emph lst) = do - contents <- inlineListToMuse lst - return $ "<em>" <> contents <> "</em>" + useTags <- gets stUseTags + let lst' = normalizeInlineList lst + if useTags || null lst' || startsWithSpace lst' || endsWithSpace lst' + then do contents <- inlineListToMuse lst' + return $ "<em>" <> contents <> "</em>" + else do contents <- local (\env -> env { envInsideAsterisks = True }) $ inlineListToMuse lst' + modify $ \st -> st { stUseTags = True } + return $ "*" <> contents <> "*" +inlineToMuse (Strong [Emph lst]) = do + useTags <- gets stUseTags + let lst' = normalizeInlineList lst + if useTags + then do contents <- local (\env -> env { envInsideAsterisks = True }) $ inlineListToMuse lst' + modify $ \st -> st { stUseTags = False } + return $ "<strong>*" <> contents <> "*</strong>" + else if null lst' || startsWithSpace lst' || endsWithSpace lst' + then do + contents <- local (\env -> env { envInsideAsterisks = False }) $ inlineListToMuse lst' + modify $ \st -> st { stUseTags = True } + return $ "**<em>" <> contents <> "</em>**" + else do + contents <- local (\env -> env { envInsideAsterisks = True }) $ inlineListToMuse lst' + modify $ \st -> st { stUseTags = True } + return $ "***" <> contents <> "***" inlineToMuse (Strong lst) = do - contents <- inlineListToMuse lst - return $ "<strong>" <> contents <> "</strong>" + useTags <- gets stUseTags + let lst' = normalizeInlineList lst + if useTags || null lst' || startsWithSpace lst' || endsWithSpace lst' + then do contents <- inlineListToMuse lst' + modify $ \st -> st { stUseTags = False } + return $ "<strong>" <> contents <> "</strong>" + else do contents <- local (\env -> env { envInsideAsterisks = True }) $ inlineListToMuse lst' + modify $ \st -> st { stUseTags = True } + return $ "**" <> contents <> "**" inlineToMuse (Strikeout lst) = do contents <- inlineListToMuse lst + modify $ \st -> st { stUseTags = False } return $ "<del>" <> contents <> "</del>" inlineToMuse (Superscript lst) = do contents <- inlineListToMuse lst + modify $ \st -> st { stUseTags = False } return $ "<sup>" <> contents <> "</sup>" inlineToMuse (Subscript lst) = do contents <- inlineListToMuse lst + modify $ \st -> st { stUseTags = False } return $ "<sub>" <> contents <> "</sub>" inlineToMuse SmallCaps {} = fail "SmallCaps should be expanded before normalization" inlineToMuse (Quoted SingleQuote lst) = do contents <- inlineListToMuse lst + modify $ \st -> st { stUseTags = False } return $ "‘" <> contents <> "’" inlineToMuse (Quoted DoubleQuote lst) = do contents <- inlineListToMuse lst + modify $ \st -> st { stUseTags = False } return $ "“" <> contents <> "”" inlineToMuse Cite {} = fail "Citations should be expanded before normalization" -inlineToMuse (Code _ str) = return $ - "<code>" <> text (substitute "</code>" "<</code><code>/code>" str) <> "</code>" +inlineToMuse (Code _ str) = do + useTags <- gets stUseTags + modify $ \st -> st { stUseTags = False } + return $ if useTags || null str || '=' `elem` str || isSpace (head str) || isSpace (last str) + then "<code>" <> text (substitute "</code>" "<</code><code>/code>" str) <> "</code>" + else "=" <> text str <> "=" inlineToMuse Math{} = fail "Math should be expanded before normalization" -inlineToMuse (RawInline (Format f) str) = +inlineToMuse (RawInline (Format f) str) = do + modify $ \st -> st { stUseTags = False } return $ "<literal style=\"" <> text f <> "\">" <> text str <> "</literal>" inlineToMuse LineBreak = do oneline <- asks envOneLine + modify $ \st -> st { stUseTags = False } return $ if oneline then "<br>" else "<br>" <> cr -inlineToMuse Space = return space +inlineToMuse Space = do + modify $ \st -> st { stUseTags = False } + return space inlineToMuse SoftBreak = do oneline <- asks envOneLine wrapText <- asks $ writerWrapText . envOptions + modify $ \st -> st { stUseTags = False } return $ if not oneline && wrapText == WrapPreserve then cr else space inlineToMuse (Link _ txt (src, _)) = case txt of - [Str x] | escapeURI x == src -> + [Str x] | escapeURI x == src -> do + modify $ \st -> st { stUseTags = False } return $ "[[" <> text (escapeLink x) <> "]]" _ -> do contents <- local (\env -> env { envInsideLinkDescription = True }) $ inlineListToMuse txt + modify $ \st -> st { stUseTags = False } return $ "[[" <> text (escapeLink src) <> "][" <> contents <> "]]" where escapeLink lnk = if isImageUrl lnk then "URL:" ++ urlEscapeBrackets lnk else urlEscapeBrackets lnk -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el @@ -514,11 +642,12 @@ inlineToMuse (Image attr alt (source,'f':'i':'g':':':title)) = inlineToMuse (Image attr@(_, classes, _) inlines (source, title)) = do opts <- asks envOptions alt <- local (\env -> env { envInsideLinkDescription = True }) $ inlineListToMuse inlines - let title' = if null title - then if null inlines - then "" - else "[" <> alt <> "]" - else "[" <> text (conditionalEscapeString True title) <> "]" + title' <- if null title + then if null inlines + then return "" + else return $ "[" <> alt <> "]" + else do s <- local (\env -> env { envInsideLinkDescription = True }) $ conditionalEscapeString title + return $ "[" <> text s <> "]" let width = case dimension Width attr of Just (Percent x) | isEnabled Ext_amuse opts -> " " ++ show (round x :: Integer) _ -> "" @@ -528,11 +657,14 @@ inlineToMuse (Image attr@(_, classes, _) inlines (source, title)) = do let rightalign = if "align-right" `elem` classes then " r" else "" + modify $ \st -> st { stUseTags = False } return $ "[[" <> text (urlEscapeBrackets source ++ width ++ leftalign ++ rightalign) <> "]" <> title' <> "]" inlineToMuse (Note contents) = do -- add to notes in state notes <- gets stNotes - modify $ \st -> st { stNotes = contents:notes } + modify $ \st -> st { stNotes = contents:notes + , stUseTags = False + } let ref = show $ length notes + 1 return $ "[" <> text ref <> "]" inlineToMuse (Span (anchor,names,_) inlines) = do @@ -540,6 +672,7 @@ inlineToMuse (Span (anchor,names,_) inlines) = do let anchorDoc = if null anchor then mempty else text ('#':anchor) <> space + modify $ \st -> st { stUseTags = False } return $ anchorDoc <> (if null inlines && not (null anchor) then mempty else (if null names diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 7aecb3da5..1c9481630 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -189,8 +189,8 @@ transformPicMath opts (Image attr@(id', cls, _) lab (src,t)) = catchError let dims = case (getDim Width, getDim Height) of (Just w, Just h) -> [("width", show w), ("height", show h)] - (Just w@(Percent p), Nothing) -> [("width", show w), ("height", show (p / ratio) ++ "%")] - (Nothing, Just h@(Percent p)) -> [("width", show (p * ratio) ++ "%"), ("height", show h)] + (Just w@(Percent _), Nothing) -> [("rel-width", show w),("rel-height", "scale"),("width", show ptX ++ "pt"),("height", show ptY ++ "pt")] + (Nothing, Just h@(Percent _)) -> [("rel-width", "scale"),("rel-height", show h),("width", show ptX ++ "pt"),("height", show ptY ++ "pt")] (Just w@(Inch i), Nothing) -> [("width", show w), ("height", show (i / ratio) ++ "in")] (Nothing, Just h@(Inch i)) -> [("width", show (i * ratio) ++ "in"), ("height", show h)] _ -> [("width", show ptX ++ "pt"), ("height", show ptY ++ "pt")] diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs index 6c48046a2..716c5cbad 100644 --- a/src/Text/Pandoc/Writers/OPML.hs +++ b/src/Text/Pandoc/Writers/OPML.hs @@ -62,7 +62,8 @@ writeOPML opts (Pandoc meta blocks) = do meta' main <- (render colwidth . vcat) <$> mapM (elementToOPML opts) elements let context = defField "body" main metadata - case writerTemplate opts of + (if writerPreferAscii opts then toEntities else id) <$> + case writerTemplate opts of Nothing -> return main Just tpl -> renderTemplate' tpl context diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 514327e9a..d9f0a8e44 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -39,17 +39,20 @@ import Control.Monad.State.Strict hiding (when) import Data.Char (chr) import Data.List (sortBy) import qualified Data.Map as Map +import Data.Maybe (fromMaybe) import Data.Ord (comparing) import qualified Data.Set as Set import Data.Text (Text) import Text.Pandoc.BCP47 (Lang (..), parseBCP47) -import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Class (PandocMonad, report, translateTerm, + setTranslations, toLang) import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Pretty import Text.Pandoc.Shared (linesToPara) import Text.Pandoc.Templates (renderTemplate') +import qualified Text.Pandoc.Translations as Term (Term(Figure, Table)) import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared import Text.Pandoc.XML @@ -67,32 +70,36 @@ plainToPara x = x type OD m = StateT WriterState m data WriterState = - WriterState { stNotes :: [Doc] - , stTableStyles :: [Doc] - , stParaStyles :: [Doc] - , stListStyles :: [(Int, [Doc])] - , stTextStyles :: Map.Map (Set.Set TextStyle) (String, Doc) - , stTextStyleAttr :: Set.Set TextStyle - , stIndentPara :: Int - , stInDefinition :: Bool - , stTight :: Bool - , stFirstPara :: Bool - , stImageId :: Int + WriterState { stNotes :: [Doc] + , stTableStyles :: [Doc] + , stParaStyles :: [Doc] + , stListStyles :: [(Int, [Doc])] + , stTextStyles :: Map.Map (Set.Set TextStyle) (String, Doc) + , stTextStyleAttr :: Set.Set TextStyle + , stIndentPara :: Int + , stInDefinition :: Bool + , stTight :: Bool + , stFirstPara :: Bool + , stImageId :: Int + , stTableCaptionId :: Int + , stImageCaptionId :: Int } defaultWriterState :: WriterState defaultWriterState = - WriterState { stNotes = [] - , stTableStyles = [] - , stParaStyles = [] - , stListStyles = [] - , stTextStyles = Map.empty - , stTextStyleAttr = Set.empty - , stIndentPara = 0 - , stInDefinition = False - , stTight = False - , stFirstPara = False - , stImageId = 1 + WriterState { stNotes = [] + , stTableStyles = [] + , stParaStyles = [] + , stListStyles = [] + , stTextStyles = Map.empty + , stTextStyleAttr = Set.empty + , stIndentPara = 0 + , stInDefinition = False + , stTight = False + , stFirstPara = False + , stImageId = 1 + , stTableCaptionId = 1 + , stImageCaptionId = 1 } when :: Bool -> Doc -> Doc @@ -193,10 +200,15 @@ formulaStyle mt = inTags False "style:style" ,("style:horizontal-rel", "paragraph-content") ,("style:wrap", "none")] -inHeaderTags :: PandocMonad m => Int -> Doc -> OD m Doc -inHeaderTags i d = +inHeaderTags :: PandocMonad m => Int -> String -> Doc -> OD m Doc +inHeaderTags i ident d = return $ inTags False "text:h" [ ("text:style-name", "Heading_20_" ++ show i) - , ("text:outline-level", show i)] d + , ("text:outline-level", show i)] + $ if null ident + then d + else selfClosingTag "text:bookmark-start" [ ("text:name", ident) ] + <> d <> + selfClosingTag "text:bookmark-end" [ ("text:name", ident) ] inQuotes :: QuoteType -> Doc -> Doc inQuotes SingleQuote s = char '\8216' <> s <> char '\8217' @@ -218,6 +230,11 @@ handleSpaces s -- | Convert Pandoc document to string in OpenDocument format. writeOpenDocument :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeOpenDocument opts (Pandoc meta blocks) = do + let defLang = Lang "en" "US" "" [] + lang <- case lookupMetaString "lang" meta of + "" -> pure defLang + s -> fromMaybe defLang <$> toLang (Just s) + setTranslations lang let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing @@ -349,8 +366,9 @@ blockToOpenDocument o bs | LineBlock b <- bs = blockToOpenDocument o $ linesToPara b | Div attr xs <- bs = withLangFromAttr attr (blocksToOpenDocument o xs) - | Header i _ b <- bs = setFirstPara >> - (inHeaderTags i =<< inlinesToOpenDocument o b) + | Header i (ident,_,_) b + <- bs = setFirstPara >> (inHeaderTags i ident + =<< inlinesToOpenDocument o b) | BlockQuote b <- bs = setFirstPara >> mkBlockQuote b | DefinitionList b <- bs = setFirstPara >> defList b | BulletList b <- bs = setFirstPara >> bulletListToOpenDocument o b @@ -394,11 +412,11 @@ blockToOpenDocument o bs mapM_ addParaStyle . newPara $ paraHStyles ++ paraStyles captionDoc <- if null c then return empty - else withParagraphStyle o "Table" [Para c] + else inlinesToOpenDocument o c >>= numberedTableCaption th <- if all null h then return empty - else colHeadsToOpenDocument o name (map fst paraHStyles) h - tr <- mapM (tableRowToOpenDocument o name (map fst paraStyles)) r + else colHeadsToOpenDocument o (map fst paraHStyles) h + tr <- mapM (tableRowToOpenDocument o (map fst paraStyles)) r return $ inTags True "table:table" [ ("table:name" , name) , ("table:style-name", name) ] (vcat columns $$ th $$ vcat tr) $$ captionDoc @@ -406,28 +424,54 @@ blockToOpenDocument o bs withParagraphStyle o "Figure" [Para [Image attr caption (source,title)]] | otherwise = do imageDoc <- withParagraphStyle o "FigureWithCaption" [Para [Image attr caption (source,title)]] - captionDoc <- withParagraphStyle o "FigureCaption" [Para caption] + captionDoc <- inlinesToOpenDocument o caption >>= numberedFigureCaption return $ imageDoc $$ captionDoc + +numberedTableCaption :: PandocMonad m => Doc -> OD m Doc +numberedTableCaption caption = do + id' <- gets stTableCaptionId + modify (\st -> st{ stTableCaptionId = id' + 1 }) + capterm <- translateTerm Term.Table + return $ numberedCaption "Table" capterm "Table" id' caption + +numberedFigureCaption :: PandocMonad m => Doc -> OD m Doc +numberedFigureCaption caption = do + id' <- gets stImageCaptionId + modify (\st -> st{ stImageCaptionId = id' + 1 }) + capterm <- translateTerm Term.Figure + return $ numberedCaption "FigureCaption" capterm "Illustration" id' caption + +numberedCaption :: String -> String -> String -> Int -> Doc -> Doc +numberedCaption style term name num caption = + let t = text term + r = num - 1 + s = inTags False "text:sequence" [ ("text:ref-name", "ref" ++ name ++ show r), + ("text:name", name), + ("text:formula", "ooow:" ++ name ++ "+1"), + ("style:num-format", "1") ] $ text $ show num + c = text ": " + in inParagraphTagsWithStyle style $ hcat [ t, text " ", s, c, caption ] + colHeadsToOpenDocument :: PandocMonad m - => WriterOptions -> String -> [String] -> [[Block]] + => WriterOptions -> [String] -> [[Block]] -> OD m Doc -colHeadsToOpenDocument o tn ns hs = +colHeadsToOpenDocument o ns hs = inTagsIndented "table:table-header-rows" . inTagsIndented "table:table-row" . vcat <$> - mapM (tableItemToOpenDocument o tn) (zip ns hs) + mapM (tableItemToOpenDocument o "TableHeaderRowCell") (zip ns hs) tableRowToOpenDocument :: PandocMonad m - => WriterOptions -> String -> [String] -> [[Block]] + => WriterOptions -> [String] -> [[Block]] -> OD m Doc -tableRowToOpenDocument o tn ns cs = +tableRowToOpenDocument o ns cs = inTagsIndented "table:table-row" . vcat <$> - mapM (tableItemToOpenDocument o tn) (zip ns cs) + mapM (tableItemToOpenDocument o "TableRowCell") (zip ns cs) tableItemToOpenDocument :: PandocMonad m => WriterOptions -> String -> (String,[Block]) -> OD m Doc -tableItemToOpenDocument o tn (n,i) = - let a = [ ("table:style-name" , tn ++ ".A1" ) +tableItemToOpenDocument o s (n,i) = + let a = [ ("table:style-name" , s ) , ("office:value-type", "string" ) ] in inTags True "table:table-cell" a <$> @@ -500,7 +544,9 @@ inlineToOpenDocument o ils modify (\st -> st{ stImageId = id' + 1 }) let getDims [] = [] getDims (("width", w) :xs) = ("svg:width", w) : getDims xs + getDims (("rel-width", w):xs) = ("style:rel-width", w) : getDims xs getDims (("height", h):xs) = ("svg:height", h) : getDims xs + getDims (("rel-height", w):xs) = ("style:rel-height", w) : getDims xs getDims (_:xs) = getDims xs return $ inTags False "draw:frame" (("draw:name", "img" ++ show id') : getDims kvs) $ @@ -555,10 +601,18 @@ orderedListLevelStyle (s,n, d) (l,ls) = listLevelStyle :: Int -> Doc listLevelStyle i = - let indent = show (0.4 * fromIntegral (i - 1) :: Double) in - selfClosingTag "style:list-level-properties" - [ ("text:space-before" , indent ++ "in") - , ("text:min-label-width", "0.4in")] + let indent = show (0.5 * fromIntegral i :: Double) in + inTags True "style:list-level-properties" + [ ("text:list-level-position-and-space-mode", + "label-alignment") + , ("fo:text-align", "right") + ] $ + selfClosingTag "style:list-level-label-alignment" + [ ("text:label-followed-by", "listtab") + , ("text:list-tab-stop-position", indent ++ "in") + , ("fo:text-indent", "-0.1in") + , ("fo:margin-left", indent ++ "in") + ] tableStyle :: Int -> [(Char,Double)] -> Doc tableStyle num wcs = @@ -576,13 +630,21 @@ tableStyle num wcs = , ("style:family", "table-column" )] $ selfClosingTag "style:table-column-properties" [("style:rel-column-width", printf "%d*" (floor $ w * 65535 :: Integer))] - cellStyle = inTags True "style:style" - [ ("style:name" , tableId ++ ".A1") + headerRowCellStyle = inTags True "style:style" + [ ("style:name" , "TableHeaderRowCell") + , ("style:family", "table-cell" )] $ + selfClosingTag "style:table-cell-properties" + [ ("fo:border", "none")] + rowCellStyle = inTags True "style:style" + [ ("style:name" , "TableRowCell") , ("style:family", "table-cell" )] $ selfClosingTag "style:table-cell-properties" [ ("fo:border", "none")] + cellStyles = if num == 0 + then headerRowCellStyle $$ rowCellStyle + else empty columnStyles = map colStyle wcs - in table $$ vcat columnStyles $$ cellStyle + in cellStyles $$ table $$ vcat columnStyles paraStyle :: PandocMonad m => [(String,String)] -> OD m Int paraStyle attrs = do diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index a71775e13..12a54fd71 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -109,7 +109,7 @@ escapeString = escapeStringUsing $ , ('\x2013',"--") , ('\x2019',"'") , ('\x2026',"...") - ] ++ backslashEscapes "^_" + ] isRawFormat :: Format -> Bool isRawFormat f = @@ -266,7 +266,7 @@ orderedListItemToOrg marker items = do contents <- blockListToOrg items return $ hang (length marker + 1) (text marker <> space) (contents <> cr) --- | Convert defintion list item (label, list of blocks) to Org. +-- | Convert definition list item (label, list of blocks) to Org. definitionListItemToOrg :: PandocMonad m => ([Inline], [[Block]]) -> Org m Doc definitionListItemToOrg (label, defs) = do diff --git a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs index e14476b16..c97d8d770 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs @@ -72,7 +72,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Walk import Data.Time (UTCTime) import qualified Text.Pandoc.Shared as Shared -- so we don't overlap "Element" -import Text.Pandoc.Writers.Shared (metaValueToInlines) +import Text.Pandoc.Writers.Shared (lookupMetaInlines) import qualified Data.Map as M import qualified Data.Set as S import Data.Maybe (maybeToList, fromMaybe) @@ -731,9 +731,9 @@ makeEndNotesSlideBlocks = do anchorSet <- M.keysSet <$> gets stAnchorMap if M.null noteIds then return [] - else let title = case lookupMeta "notes-title" meta of - Just val -> metaValueToInlines val - Nothing -> [Str "Notes"] + else let title = case lookupMetaInlines "notes-title" meta of + [] -> [Str "Notes"] + ls -> ls ident = Shared.uniqueIdent title anchorSet hdr = Header slideLevel (ident, [], []) title blks = concatMap (\(n, bs) -> makeNoteEntry n bs) $ @@ -744,13 +744,7 @@ getMetaSlide :: Pres (Maybe Slide) getMetaSlide = do meta <- asks envMetadata title <- inlinesToParElems $ docTitle meta - subtitle <- inlinesToParElems $ - case lookupMeta "subtitle" meta of - Just (MetaString s) -> [Str s] - Just (MetaInlines ils) -> ils - Just (MetaBlocks [Plain ils]) -> ils - Just (MetaBlocks [Para ils]) -> ils - _ -> [] + subtitle <- inlinesToParElems $ lookupMetaInlines "subtitle" meta authors <- mapM inlinesToParElems $ docAuthors meta date <- inlinesToParElems $ docDate meta if null title && null subtitle && null authors && null date @@ -785,9 +779,9 @@ makeTOCSlide blks = local (\env -> env{envCurSlideId = tocSlideId}) $ do contents <- BulletList <$> mapM elementToListItem (Shared.hierarchicalize blks) meta <- asks envMetadata slideLevel <- asks envSlideLevel - let tocTitle = case lookupMeta "toc-title" meta of - Just val -> metaValueToInlines val - Nothing -> [Str "Table of Contents"] + let tocTitle = case lookupMetaInlines "toc-title" meta of + [] -> [Str "Table of Contents"] + ls -> ls hdr = Header slideLevel nullAttr tocTitle blocksToSlide [hdr, contents] diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index f82597c55..d64529c21 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -35,7 +35,7 @@ module Text.Pandoc.Writers.RST ( writeRST, flatten ) where import Prelude import Control.Monad.State.Strict import Data.Char (isSpace, toLower) -import Data.List (isPrefixOf, stripPrefix) +import Data.List (isPrefixOf, stripPrefix, transpose) import Data.Maybe (fromMaybe) import Data.Text (Text, stripEnd) import qualified Text.Pandoc.Builder as B @@ -82,14 +82,12 @@ pandocToRST (Pandoc meta blocks) = do else Nothing let render' :: Doc -> Text render' = render colwidth - let subtit = case lookupMeta "subtitle" meta of - Just (MetaBlocks [Plain xs]) -> xs - _ -> [] + let subtit = lookupMetaInlines "subtitle" meta title <- titleToRST (docTitle meta) subtit metadata <- metaToJSON opts (fmap render' . blockListToRST) (fmap (stripEnd . render') . inlineListToRST) - $ B.deleteMeta "title" $ B.deleteMeta "subtitle" meta + meta body <- blockListToRST' True $ case writerTemplate opts of Just _ -> normalizeHeadings 1 blocks Nothing -> blocks @@ -103,8 +101,9 @@ pandocToRST (Pandoc meta blocks) = do let context = defField "body" main $ defField "toc" (writerTableOfContents opts) $ defField "toc-depth" (show $ writerTOCDepth opts) + $ defField "number-sections" (writerNumberSections opts) $ defField "math" hasMath - $ defField "title" (render Nothing title :: String) + $ defField "titleblock" (render Nothing title :: String) $ defField "math" hasMath $ defField "rawtex" rawTeX metadata case writerTemplate opts of @@ -209,11 +208,26 @@ blockToRST :: PandocMonad m => Block -- ^ Block element -> RST m Doc blockToRST Null = return empty -blockToRST (Div attr bs) = do +blockToRST (Div ("",["admonition-title"],[]) _) = return empty + -- this is generated by the rst reader and can safely be + -- omitted when we're generating rst +blockToRST (Div (ident,classes,_kvs) bs) = do contents <- blockListToRST bs - let startTag = ".. raw:: html" $+$ nest 3 (tagWithAttrs "div" attr) - let endTag = ".. raw:: html" $+$ nest 3 "</div>" - return $ blankline <> startTag $+$ contents $+$ endTag $$ blankline + let admonitions = ["attention","caution","danger","error","hint", + "important","note","tip","warning","admonition"] + let admonition = case classes of + (cl:_) + | cl `elem` admonitions + -> ".. " <> text cl <> "::" + cls -> ".. container::" <> space <> + text (unwords (filter (/= "container") cls)) + return $ blankline $$ + admonition $$ + (if null ident + then blankline + else " :name: " <> text ident $$ blankline) $$ + nest 3 contents $$ + blankline blockToRST (Plain inlines) = inlineListToRST inlines -- title beginning with fig: indicates that the image is a figure blockToRST (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do @@ -236,6 +250,7 @@ blockToRST (LineBlock lns) = linesToLineBlock lns blockToRST (RawBlock f@(Format f') str) | f == "rst" = return $ text str + | f == "tex" = blockToRST (RawBlock (Format "latex") str) | otherwise = return $ blankline <> ".. raw:: " <> text (map toLower f') $+$ nest 3 (text str) $$ blankline @@ -272,7 +287,8 @@ blockToRST (CodeBlock (_,classes,kvs) str) = do then return $ prefixed "> " (text str) $$ blankline else return $ (case [c | c <- classes, - c `notElem` ["sourceCode","literate","numberLines"]] of + c `notElem` ["sourceCode","literate","numberLines", + "number-lines","example"]] of [] -> "::" (lang:_) -> (".. code:: " <> text lang) $$ numberlines) $+$ nest 3 (text str) $$ blankline @@ -288,9 +304,12 @@ blockToRST (Table caption aligns widths headers rows) = do modify $ \st -> st{ stOptions = oldOpts } return result opts <- gets stOptions - tbl <- gridTable opts blocksToDoc (all null headers) - (map (const AlignDefault) aligns) widths - headers rows + let isSimple = all (== 0) widths + tbl <- if isSimple + then simpleTable opts blocksToDoc headers rows + else gridTable opts blocksToDoc (all null headers) + (map (const AlignDefault) aligns) widths + headers rows return $ if null caption then tbl $$ blankline else (".. table:: " <> caption') $$ blankline $$ nest 3 tbl $$ @@ -331,7 +350,7 @@ orderedListItemToRST marker items = do let marker' = marker ++ " " return $ hang (length marker') (text marker') $ contents <> cr --- | Convert defintion list item (label, list of blocks) to RST. +-- | Convert definition list item (label, list of blocks) to RST. definitionListItemToRST :: PandocMonad m => ([Inline], [[Block]]) -> RST m Doc definitionListItemToRST (label, defs) = do label' <- inlineListToRST label @@ -470,6 +489,8 @@ flatten outer -- them and they will be readable and parsable (Quoted _ _, _) -> keep f i (_, Quoted _ _) -> keep f i + -- inlineToRST handles this case properly so it's safe to keep + (Link _ _ _, Image _ _ _) -> keep f i -- parent inlines would prevent links from being correctly -- parsed, in this case we prioritise the content over the -- style @@ -569,15 +590,18 @@ inlineToRST (Quoted DoubleQuote lst) = do else return $ "“" <> contents <> "”" inlineToRST (Cite _ lst) = writeInlines lst +inlineToRST (Code (_,["interpreted-text"],[("role",role)]) str) = do + return $ ":" <> text role <> ":`" <> text str <> "`" inlineToRST (Code _ str) = do opts <- gets stOptions -- we trim the string because the delimiters must adjoin a -- non-space character; see #3496 -- we use :literal: when the code contains backticks, since -- :literal: allows backslash-escapes; see #3974 - return $ if '`' `elem` str - then ":literal:`" <> text (escapeString opts (trim str)) <> "`" - else "``" <> text (trim str) <> "``" + return $ + if '`' `elem` str + then ":literal:`" <> text (escapeString opts (trim str)) <> "`" + else "``" <> text (trim str) <> "``" inlineToRST (Str str) = do opts <- gets stOptions return $ text $ @@ -672,3 +696,30 @@ imageDimsToRST attr = do Just dim -> cols dim Nothing -> empty return $ cr <> name $$ showDim Width $$ showDim Height + +simpleTable :: PandocMonad m + => WriterOptions + -> (WriterOptions -> [Block] -> m Doc) + -> [[Block]] + -> [[[Block]]] + -> m Doc +simpleTable opts blocksToDoc headers rows = do + -- can't have empty cells in first column: + let fixEmpties (d:ds) = if isEmpty d + then text "\\ " : ds + else d : ds + fixEmpties [] = [] + headerDocs <- if all null headers + then return [] + else fixEmpties <$> mapM (blocksToDoc opts) headers + rowDocs <- mapM (fmap fixEmpties . mapM (blocksToDoc opts)) rows + let numChars [] = 0 + numChars xs = maximum . map offset $ xs + let colWidths = map numChars $ transpose (headerDocs : rowDocs) + let toRow = hsep . zipWith lblock colWidths + let hline = hsep (map (\n -> text (replicate n '=')) colWidths) + let hdr = if all null headers + then mempty + else hline $$ toRow headerDocs + let bdy = vcat $ map toRow rowDocs + return $ hdr $$ hline $$ bdy $$ hline diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 3045c1c10..ed8dc9ae4 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -341,8 +341,10 @@ listItemToRTF :: PandocMonad m listItemToRTF alignment indent marker [] = return $ rtfCompact (indent + listIncrement) (negate listIncrement) alignment (marker ++ "\\tx" ++ show listIncrement ++ "\\tab ") -listItemToRTF alignment indent marker list = do - (first:rest) <- mapM (blockToRTF (indent + listIncrement) alignment) list +listItemToRTF alignment indent marker (listFirst:listRest) = do + let f = blockToRTF (indent + listIncrement) alignment + first <- f listFirst + rest <- mapM f listRest let listMarker = "\\fi" ++ show (negate listIncrement) ++ " " ++ marker ++ "\\tx" ++ show listIncrement ++ "\\tab" let insertListMarker ('\\':'f':'i':'-':d:xs) | isDigit d = diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 2edce7deb..ed2c46d7b 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -38,17 +38,27 @@ module Text.Pandoc.Writers.Shared ( , resetField , defField , tagWithAttrs + , isDisplayMath , fixDisplayMath , unsmartify + , hasSimpleCells , gridTable - , metaValueToInlines + , lookupMetaBool + , lookupMetaBlocks + , lookupMetaInlines + , lookupMetaString , stripLeadingTrailingSpace + , groffEscape + , toSubscript + , toSuperscript ) where import Prelude import Control.Monad (zipWithM) +import Data.Monoid (Any (..)) import Data.Aeson (FromJSON (..), Result (..), ToJSON (..), Value (Object), encode, fromJSON) +import Data.Char (chr, ord, isAscii, isSpace) import qualified Data.HashMap.Strict as H import Data.List (groupBy, intersperse, transpose) import qualified Data.Map as M @@ -59,9 +69,11 @@ import qualified Text.Pandoc.Builder as Builder import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Pretty -import Text.Pandoc.Walk (query) +import Text.Pandoc.Shared (stringify) import Text.Pandoc.UTF8 (toStringLazy) import Text.Pandoc.XML (escapeStringForXML) +import Text.Pandoc.Walk (query) +import Text.Printf (printf) -- | Create JSON value for template from a 'Meta' and an association list -- of variables, specified at the command line or in the writer. @@ -187,8 +199,9 @@ tagWithAttrs tag (ident,classes,kvs) = hsep ] <> ">" isDisplayMath :: Inline -> Bool -isDisplayMath (Math DisplayMath _) = True -isDisplayMath _ = False +isDisplayMath (Math DisplayMath _) = True +isDisplayMath (Span _ [Math DisplayMath _]) = True +isDisplayMath _ = False stripLeadingTrailingSpace :: [Inline] -> [Inline] stripLeadingTrailingSpace = go . reverse . go . reverse @@ -233,6 +246,21 @@ unsmartify opts ('\8216':xs) = '\'' : unsmartify opts xs unsmartify opts (x:xs) = x : unsmartify opts xs unsmartify _ [] = [] +-- | True if block is a table that can be represented with +-- one line per row. +hasSimpleCells :: Block -> Bool +hasSimpleCells (Table _caption _aligns _widths headers rows) = + all isSimpleCell (concat (headers:rows)) + where + isLineBreak LineBreak = Any True + isLineBreak _ = Any False + hasLineBreak = getAny . query isLineBreak + isSimpleCell [Plain ils] = not (hasLineBreak ils) + isSimpleCell [Para ils ] = not (hasLineBreak ils) + isSimpleCell [] = True + isSimpleCell _ = False +hasSimpleCells _ = False + gridTable :: Monad m => WriterOptions -> (WriterOptions -> [Block] -> m Doc) @@ -332,9 +360,82 @@ gridTable opts blocksToDoc headless aligns widths headers rows = do body $$ border '-' (repeat AlignDefault) widthsInChars -metaValueToInlines :: MetaValue -> [Inline] -metaValueToInlines (MetaString s) = [Str s] -metaValueToInlines (MetaInlines ils) = ils -metaValueToInlines (MetaBlocks bs) = query return bs -metaValueToInlines (MetaBool b) = [Str $ show b] -metaValueToInlines _ = [] + + +-- | Retrieve the metadata value for a given @key@ +-- and convert to Bool. +lookupMetaBool :: String -> Meta -> Bool +lookupMetaBool key meta = + case lookupMeta key meta of + Just (MetaBlocks _) -> True + Just (MetaInlines _) -> True + Just (MetaString (_:_)) -> True + Just (MetaBool True) -> True + _ -> False + +-- | Retrieve the metadata value for a given @key@ +-- and extract blocks. +lookupMetaBlocks :: String -> Meta -> [Block] +lookupMetaBlocks key meta = + case lookupMeta key meta of + Just (MetaBlocks bs) -> bs + Just (MetaInlines ils) -> [Plain ils] + Just (MetaString s) -> [Plain [Str s]] + _ -> [] + +-- | Retrieve the metadata value for a given @key@ +-- and extract inlines. +lookupMetaInlines :: String -> Meta -> [Inline] +lookupMetaInlines key meta = + case lookupMeta key meta of + Just (MetaString s) -> [Str s] + Just (MetaInlines ils) -> ils + Just (MetaBlocks [Plain ils]) -> ils + Just (MetaBlocks [Para ils]) -> ils + _ -> [] + +-- | Retrieve the metadata value for a given @key@ +-- and convert to String. +lookupMetaString :: String -> Meta -> String +lookupMetaString key meta = + case lookupMeta key meta of + Just (MetaString s) -> s + Just (MetaInlines ils) -> stringify ils + Just (MetaBlocks bs) -> stringify bs + Just (MetaBool b) -> show b + _ -> "" + +-- | Escape non-ASCII characters using groff \u[..] sequences. +groffEscape :: T.Text -> T.Text +groffEscape = T.concatMap toUchar + where toUchar c + | isAscii c = T.singleton c + | otherwise = T.pack $ printf "\\[u%04X]" (ord c) + + +toSuperscript :: Char -> Maybe Char +toSuperscript '1' = Just '\x00B9' +toSuperscript '2' = Just '\x00B2' +toSuperscript '3' = Just '\x00B3' +toSuperscript '+' = Just '\x207A' +toSuperscript '-' = Just '\x207B' +toSuperscript '=' = Just '\x207C' +toSuperscript '(' = Just '\x207D' +toSuperscript ')' = Just '\x207E' +toSuperscript c + | c >= '0' && c <= '9' = + Just $ chr (0x2070 + (ord c - 48)) + | isSpace c = Just c + | otherwise = Nothing + +toSubscript :: Char -> Maybe Char +toSubscript '+' = Just '\x208A' +toSubscript '-' = Just '\x208B' +toSubscript '=' = Just '\x208C' +toSubscript '(' = Just '\x208D' +toSubscript ')' = Just '\x208E' +toSubscript c + | c >= '0' && c <= '9' = + Just $ chr (0x2080 + (ord c - 48)) + | isSpace c = Just c + | otherwise = Nothing diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs index e461f5715..9169c8515 100644 --- a/src/Text/Pandoc/Writers/TEI.hs +++ b/src/Text/Pandoc/Writers/TEI.hs @@ -35,7 +35,6 @@ import Prelude import Data.Char (toLower) import Data.List (isPrefixOf, stripPrefix) import Data.Text (Text) -import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Highlighting (languages, languagesByExtension) @@ -48,16 +47,6 @@ import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Writers.Shared import Text.Pandoc.XML --- | Convert list of authors to a docbook <author> section -authorToTEI :: PandocMonad m => WriterOptions -> [Inline] -> m B.Inlines -authorToTEI opts name' = do - name <- render Nothing <$> inlinesToTEI opts name' - let colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing - return $ B.rawInline "tei" $ render colwidth $ - inTagsSimple "author" (text $ escapeStringForXML name) - -- | Convert Pandoc document to string in Docbook format. writeTEI :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeTEI opts (Pandoc meta blocks) = do @@ -72,13 +61,11 @@ writeTEI opts (Pandoc meta blocks) = do TopLevelChapter -> 0 TopLevelSection -> 1 TopLevelDefault -> 1 - auths' <- mapM (authorToTEI opts) $ docAuthors meta - let meta' = B.setMeta "author" auths' meta metadata <- metaToJSON opts (fmap (render' . vcat) . mapM (elementToTEI opts startLvl) . hierarchicalize) (fmap render' . inlinesToTEI opts) - meta' + meta main <- (render' . vcat) <$> mapM (elementToTEI opts startLvl) elements let context = defField "body" main $ diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 305b41206..21d1f4eca 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -56,8 +56,6 @@ import Text.Printf (printf) data WriterState = WriterState { stStrikeout :: Bool -- document contains strikeout - , stSuperscript :: Bool -- document contains superscript - , stSubscript :: Bool -- document contains subscript , stEscapeComma :: Bool -- in a context where we need @comma , stIdentifiers :: Set.Set String -- header ids used already , stOptions :: WriterOptions -- writer options @@ -74,8 +72,7 @@ type TI m = StateT WriterState m writeTexinfo :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeTexinfo options document = evalStateT (pandocToTexinfo options $ wrapTop document) - WriterState { stStrikeout = False, stSuperscript = False, - stEscapeComma = False, stSubscript = False, + WriterState { stStrikeout = False, stEscapeComma = False, stIdentifiers = Set.empty, stOptions = options} -- | Add a "Top" node around the document, needed by Texinfo. @@ -102,8 +99,6 @@ pandocToTexinfo options (Pandoc meta blocks) = do let context = defField "body" body $ defField "toc" (writerTableOfContents options) $ defField "titlepage" titlePage - $ defField "subscript" (stSubscript st) - $ defField "superscript" (stSuperscript st) $ defField "strikeout" (stStrikeout st) metadata case writerTemplate options of @@ -351,12 +346,9 @@ collectNodes :: Int -> [Block] -> [Block] collectNodes _ [] = [] collectNodes level (x:xs) = case x of - (Header hl _ _) -> - if hl < level - then [] - else if hl == level - then x : collectNodes level xs - else collectNodes level xs + (Header hl _ _) | hl < level -> [] + | hl == level -> x : collectNodes level xs + | otherwise -> collectNodes level xs _ -> collectNodes level xs @@ -394,7 +386,7 @@ defListItemToTexinfo (term, defs) = do inlineListToTexinfo :: PandocMonad m => [Inline] -- ^ Inlines to convert -> TI m Doc -inlineListToTexinfo lst = mapM inlineToTexinfo lst >>= return . hcat +inlineListToTexinfo lst = hcat <$> mapM inlineToTexinfo lst -- | Convert list of inline elements to Texinfo acceptable for a node name. inlineListForNode :: PandocMonad m @@ -416,10 +408,10 @@ inlineToTexinfo (Span _ lst) = inlineListToTexinfo lst inlineToTexinfo (Emph lst) = - inlineListToTexinfo lst >>= return . inCmd "emph" + inCmd "emph" <$> inlineListToTexinfo lst inlineToTexinfo (Strong lst) = - inlineListToTexinfo lst >>= return . inCmd "strong" + inCmd "strong" <$> inlineListToTexinfo lst inlineToTexinfo (Strikeout lst) = do modify $ \st -> st{ stStrikeout = True } @@ -427,17 +419,15 @@ inlineToTexinfo (Strikeout lst) = do return $ text "@textstrikeout{" <> contents <> text "}" inlineToTexinfo (Superscript lst) = do - modify $ \st -> st{ stSuperscript = True } contents <- inlineListToTexinfo lst - return $ text "@textsuperscript{" <> contents <> char '}' + return $ text "@sup{" <> contents <> char '}' inlineToTexinfo (Subscript lst) = do - modify $ \st -> st{ stSubscript = True } contents <- inlineListToTexinfo lst - return $ text "@textsubscript{" <> contents <> char '}' + return $ text "@sub{" <> contents <> char '}' inlineToTexinfo (SmallCaps lst) = - inlineListToTexinfo lst >>= return . inCmd "sc" + inCmd "sc" <$> inlineListToTexinfo lst inlineToTexinfo (Code _ str) = return $ text $ "@code{" ++ stringToTexinfo str ++ "}" diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 0ed79d2df..c7d96454a 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -73,7 +73,7 @@ pandocToTextile opts (Pandoc meta blocks) = do (inlineListToTextile opts) meta body <- blockListToTextile opts blocks notes <- gets $ unlines . reverse . stNotes - let main = pack $ body ++ if null notes then "" else ("\n\n" ++ notes) + let main = pack $ body ++ if null notes then "" else "\n\n" ++ notes let context = defField "body" main metadata case writerTemplate opts of Nothing -> return main @@ -154,7 +154,7 @@ blockToTextile _ HorizontalRule = return "<hr />\n" blockToTextile opts (Header level (ident,classes,keyvals) inlines) = do contents <- inlineListToTextile opts inlines - let identAttr = if null ident then "" else ('#':ident) + let identAttr = if null ident then "" else '#':ident let attribs = if null identAttr && null classes then "" else "(" ++ unwords classes ++ identAttr ++ ")" @@ -382,13 +382,13 @@ blockListToTextile :: PandocMonad m -> [Block] -- ^ List of block elements -> TW m String blockListToTextile opts blocks = - mapM (blockToTextile opts) blocks >>= return . vcat + vcat <$> mapM (blockToTextile opts) blocks -- | Convert list of Pandoc inline elements to Textile. inlineListToTextile :: PandocMonad m => WriterOptions -> [Inline] -> TW m String inlineListToTextile opts lst = - mapM (inlineToTextile opts) lst >>= return . concat + concat <$> mapM (inlineToTextile opts) lst -- | Convert Pandoc inline element to Textile. inlineToTextile :: PandocMonad m => WriterOptions -> Inline -> TW m String @@ -463,15 +463,15 @@ inlineToTextile _ SoftBreak = return " " inlineToTextile _ Space = return " " inlineToTextile opts (Link (_, cls, _) txt (src, _)) = do - let classes = if null cls - then "" - else "(" ++ unwords cls ++ ")" label <- case txt of [Code _ s] | s == src -> return "$" [Str s] | s == src -> return "$" _ -> inlineListToTextile opts txt + let classes = if null cls || cls == ["uri"] && label == "$" + then "" + else "(" ++ unwords cls ++ ")" return $ "\"" ++ classes ++ label ++ "\":" ++ src inlineToTextile opts (Image attr@(_, cls, _) alt (source, tit)) = do |