diff options
Diffstat (limited to 'src/Text/Pandoc')
69 files changed, 4164 insertions, 1801 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 19066e8b7..68bdc1432 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -76,14 +76,16 @@ import System.IO.Error (isDoesNotExistError) import Text.Pandoc import Text.Pandoc.Builder (setMeta) import Text.Pandoc.Class (PandocIO, extractMedia, fillMediaBag, getLog, - setResourcePath, withMediaBag) + setResourcePath, withMediaBag, setTrace) import Text.Pandoc.Highlighting (highlightingStyles) -import Text.Pandoc.Lua (runLuaFilter) +import Text.Pandoc.Lua (runLuaFilter, LuaException(..)) +import Text.Pandoc.Writers.Math (defaultMathJaxURL, defaultKaTeXURL) import Text.Pandoc.PDF (makePDF) import Text.Pandoc.Process (pipeProcess) import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained) import Text.Pandoc.Shared (headerShift, isURI, openURL, readDataFile, - readDataFileUTF8, safeRead, tabFilter) + readDataFileUTF8, safeRead, tabFilter, + eastAsianLineBreakFilter) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.XML (toEntities) import Text.Printf @@ -133,11 +135,11 @@ convertWithOpts opts = do Nothing -> return Nothing Just fp -> Just <$> UTF8.readFile fp - let csscdn = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.6.0/katex.min.css" let mathMethod = case (optKaTeXJS opts, optKaTeXStylesheet opts) of (Nothing, _) -> optHTMLMathMethod opts - (Just js, ss) -> KaTeX js (fromMaybe csscdn ss) + (Just js, ss) -> KaTeX js (fromMaybe + (defaultKaTeXURL ++ "katex.min.css") ss) -- --bibliography implies -F pandoc-citeproc for backwards compatibility: @@ -181,11 +183,12 @@ convertWithOpts opts = do let msOutput = format == "ms" -- disabling the custom writer for now - writer <- if ".lua" `isSuffixOf` format + (writer, writerExts) <- + if ".lua" `isSuffixOf` format -- note: use non-lowercased version writerName then return (TextWriter (\o d -> liftIO $ writeCustom writerName o d) - :: Writer PandocIO) + :: Writer PandocIO, mempty) else case getWriter writerName of Left e -> E.throwIO $ PandocAppError $ if format == "pdf" @@ -195,12 +198,13 @@ convertWithOpts opts = do "\nand specify an output file with " ++ ".pdf extension (-o filename.pdf)." else e - Right w -> return (w :: Writer PandocIO) + Right (w, es) -> return (w :: Writer PandocIO, es) -- TODO: we have to get the input and the output into the state for -- the sake of the text2tags reader. - reader <- case getReader readerName of - Right r -> return (r :: Reader PandocIO) + (reader, readerExts) <- + case getReader readerName of + Right (r, es) -> return (r :: Reader PandocIO, es) Left e -> E.throwIO $ PandocAppError e' where e' = case readerName of "pdf" -> e ++ @@ -304,11 +308,11 @@ convertWithOpts opts = do , readerColumns = optColumns opts , readerTabStop = optTabStop opts , readerIndentedCodeClasses = optIndentedCodeClasses opts - , readerApplyMacros = not laTeXOutput , readerDefaultImageExtension = optDefaultImageExtension opts , readerTrackChanges = optTrackChanges opts , readerAbbreviations = abbrevs + , readerExtensions = readerExts } highlightStyle <- lookupHighlightStyle $ optHighlightStyle opts @@ -339,6 +343,7 @@ convertWithOpts opts = do writerNumberSections = optNumberSections opts, writerNumberOffset = optNumberOffset opts, writerSectionDivs = optSectionDivs opts, + writerExtensions = writerExts, writerReferenceLinks = optReferenceLinks opts, writerReferenceLocation = optReferenceLocation opts, writerDpi = optDpi opts, @@ -354,6 +359,7 @@ convertWithOpts opts = do writerSlideLevel = optSlideLevel opts, writerHighlightStyle = highlightStyle, writerSetextHeaders = optSetextHeaders opts, + writerEpubSubdirectory = optEpubSubdirectory opts, writerEpubMetadata = epubMetadata, writerEpubFonts = optEpubFonts opts, writerEpubChapterLevel = optEpubChapterLevel opts, @@ -375,13 +381,21 @@ convertWithOpts opts = do "Specify an output file using the -o option." - let transforms = case optBaseHeaderLevel opts of - x | x > 1 -> [headerShift (x - 1)] - | otherwise -> [] + let transforms = (case optBaseHeaderLevel opts of + x | x > 1 -> (headerShift (x - 1) :) + | otherwise -> id) $ + (if extensionEnabled Ext_east_asian_line_breaks + readerExts && + not (extensionEnabled Ext_east_asian_line_breaks + writerExts && + writerWrapText writerOptions == WrapPreserve) + then (eastAsianLineBreakFilter :) + else id) + [] let convertTabs = tabFilter (if optPreserveTabs opts || readerName == "t2t" - then 0 - else optTabStop opts) + then 0 + else optTabStop opts) readSources :: [FilePath] -> PandocIO Text readSources srcs = convertTabs . T.intercalate (T.pack "\n") <$> @@ -390,6 +404,7 @@ convertWithOpts opts = do let runIO' :: PandocIO a -> IO a runIO' f = do (res, reports) <- runIOorExplode $ do + setTrace (optTrace opts) setVerbosity verbosity x <- f rs <- getLog @@ -518,7 +533,8 @@ externalFilter f args' d = liftIO $ do (exitcode, outbs) <- E.handle filterException $ pipeProcess env' f' args'' $ encode d case exitcode of - ExitSuccess -> return $ either error id $ eitherDecode' outbs + ExitSuccess -> either (E.throwIO . PandocFilterError f) + return $ eitherDecode' outbs ExitFailure ec -> E.throwIO $ PandocFilterError f ("Filter returned error status " ++ show ec) where filterException :: E.SomeException -> IO a @@ -550,6 +566,7 @@ data Opt = Opt , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math , optAbbreviations :: Maybe FilePath -- ^ Path to abbrevs file , optReferenceDoc :: Maybe FilePath -- ^ Path of reference doc + , optEpubSubdirectory :: String -- ^ EPUB subdir in OCF container , optEpubMetadata :: Maybe FilePath -- ^ EPUB metadata , optEpubFonts :: [FilePath] -- ^ EPUB fonts to embed , optEpubChapterLevel :: Int -- ^ Header level at which to split chapters @@ -558,6 +575,7 @@ data Opt = Opt , optDumpArgs :: Bool -- ^ Output command-line arguments , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments , optVerbosity :: Verbosity -- ^ Verbosity of diagnostic output + , optTrace :: Bool -- ^ Enable tracing , optLogFile :: Maybe FilePath -- ^ File to write JSON log output , optFailIfWarnings :: Bool -- ^ Fail on warnings , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst @@ -624,6 +642,7 @@ defaultOpts = Opt , optHTMLMathMethod = PlainMath , optAbbreviations = Nothing , optReferenceDoc = Nothing + , optEpubSubdirectory = "EPUB" , optEpubMetadata = Nothing , optEpubFonts = [] , optEpubChapterLevel = 1 @@ -632,6 +651,7 @@ defaultOpts = Opt , optDumpArgs = False , optIgnoreArgs = False , optVerbosity = WARNING + , optTrace = False , optLogFile = Nothing , optFailIfWarnings = False , optReferenceLinks = False @@ -778,10 +798,16 @@ expandFilterPath mbDatadir fp = liftIO $ do _ -> return fp applyLuaFilters :: MonadIO m - => Maybe FilePath -> [FilePath] -> [String] -> Pandoc -> m Pandoc + => Maybe FilePath -> [FilePath] -> [String] -> Pandoc + -> m Pandoc applyLuaFilters mbDatadir filters args d = do expandedFilters <- mapM (expandFilterPath mbDatadir) filters - foldrM ($) d $ map (flip runLuaFilter args) expandedFilters + let go f d' = liftIO $ do + res <- E.try (runLuaFilter mbDatadir f args d') + case res of + Right x -> return x + Left (LuaException s) -> E.throw (PandocFilterError f s) + foldrM ($) d $ map go expandedFilters applyFilters :: MonadIO m => Maybe FilePath -> [FilePath] -> [String] -> Pandoc -> m Pandoc @@ -968,7 +994,7 @@ options = templ <- getDefaultTemplate Nothing arg case templ of Right t -> UTF8.hPutStr stdout t - Left e -> error $ show e + Left e -> E.throwIO $ PandocAppError (show e) exitSuccess) "FORMAT") "" -- "Print default template for FORMAT" @@ -1232,6 +1258,13 @@ options = "FILE") "" -- "Path of custom reference doc" + , Option "" ["epub-subdirectory"] + (ReqArg + (\arg opt -> + return opt { optEpubSubdirectory = arg }) + "DIRNAME") + "" -- "Name of subdirectory for epub content in OCF container" + , Option "" ["epub-cover-image"] (ReqArg (\arg opt -> @@ -1355,7 +1388,8 @@ options = , Option "" ["mathjax"] (OptArg (\arg opt -> do - let url' = fromMaybe "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_CHTML-full" arg + let url' = fromMaybe (defaultMathJaxURL ++ + "MathJax.js?config=TeX-AMS_CHTML-full") arg return opt { optHTMLMathMethod = MathJax url'}) "URL") "" -- "Use MathJax for HTML math" @@ -1364,7 +1398,7 @@ options = (\arg opt -> return opt { optKaTeXJS = - arg <|> Just "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.6.0/katex.min.js"}) + arg <|> Just (defaultKaTeXURL ++ "katex.min.js")}) "URL") "" -- Use KaTeX for HTML Math @@ -1388,7 +1422,7 @@ options = , Option "" ["trace"] (NoArg - (\opt -> return opt { optVerbosity = DEBUG })) + (\opt -> return opt { optTrace = True })) "" -- "Turn on diagnostic tracing in readers." , Option "" ["dump-args"] @@ -1530,6 +1564,8 @@ handleUnrecognizedOption :: String -> [String] -> [String] handleUnrecognizedOption "--smart" = (("--smart/-S has been removed. Use +smart or -smart extension instead.\n" ++ "For example: pandoc -f markdown+smart -t markdown-smart.") :) +handleUnrecognizedOption "--normalize" = + ("--normalize has been removed. Normalization is now automatic." :) handleUnrecognizedOption "-S" = handleUnrecognizedOption "--smart" handleUnrecognizedOption "--old-dashes" = ("--old-dashes has been removed. Use +old_dashes extension instead." :) diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs new file mode 100644 index 000000000..b4b55c5d4 --- /dev/null +++ b/src/Text/Pandoc/BCP47.hs @@ -0,0 +1,139 @@ +{- +Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.BCP47 + Copyright : Copyright (C) 2017 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Functions for parsing and rendering BCP47 language identifiers. +-} +module Text.Pandoc.BCP47 ( + getLang + , toLang + , parseBCP47 + , Lang(..) + , renderLang + ) +where +import Control.Monad (guard) +import Data.Char (isAscii, isLetter, isUpper, isLower, toUpper, toLower, + isAlphaNum) +import Data.List (intercalate) +import Text.Pandoc.Definition +import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Logging +import Text.Pandoc.Options +import qualified Text.Parsec as P + +-- | Represents BCP 47 language/country code. +data Lang = Lang{ langLanguage :: String + , langScript :: String + , langRegion :: String + , langVariants :: [String] } + deriving (Eq, Ord, Show) + +-- | Render a Lang as BCP 47. +renderLang :: Lang -> String +renderLang lang = intercalate "-" (langLanguage lang : filter (not . null) + ([langScript lang, langRegion lang] ++ langVariants lang)) + +-- | Get the contents of the `lang` metadata field or variable. +getLang :: WriterOptions -> Meta -> Maybe String +getLang opts meta = + case lookup "lang" (writerVariables opts) of + Just s -> Just s + _ -> + case lookupMeta "lang" meta of + Just (MetaInlines [Str s]) -> Just s + Just (MetaString s) -> Just s + _ -> Nothing + +-- | Convert BCP47 string to a Lang, issuing warning +-- if there are problems. +toLang :: PandocMonad m => Maybe String -> m (Maybe Lang) +toLang Nothing = return Nothing +toLang (Just s) = + case parseBCP47 s of + Left _ -> do + report $ InvalidLang s + return Nothing + Right l -> return (Just l) + +-- | Parse a BCP 47 string as a Lang. Currently we parse +-- extensions and private-use fields as "variants," even +-- though officially they aren't. +parseBCP47 :: String -> Either String Lang +parseBCP47 lang = + case P.parse bcp47 "lang" lang of + Right r -> Right r + Left e -> Left $ show e + where bcp47 = do + language <- pLanguage + script <- P.option "" pScript + region <- P.option "" pRegion + variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse) + P.eof + return $ Lang{ langLanguage = language + , langScript = script + , langRegion = region + , langVariants = variants } + asciiLetter = P.satisfy (\c -> isAscii c && isLetter c) + pLanguage = do + cs <- P.many1 asciiLetter + let lcs = length cs + guard $ lcs == 2 || lcs == 3 + return $ map toLower cs + pScript = P.try $ do + P.char '-' + x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c) + xs <- P.count 3 + (P.satisfy (\c -> isAscii c && isLetter c && isLower c)) + return $ map toLower (x:xs) + pRegion = P.try $ do + P.char '-' + cs <- P.many1 asciiLetter + let lcs = length cs + guard $ lcs == 2 || lcs == 3 + return $ map toUpper cs + pVariant = P.try $ do + P.char '-' + ds <- P.option "" (P.count 1 P.digit) + cs <- P.many1 asciiLetter + let var = ds ++ cs + guard $ if null ds + then length var >= 5 && length var <= 8 + else length var == 4 + return $ map toLower var + pExtension = P.try $ do + P.char '-' + cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c) + guard $ length cs >= 2 && length cs <= 8 + return $ map toLower cs + pPrivateUse = P.try $ do + P.char '-' + P.char 'x' + P.char '-' + cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c) + guard $ length cs >= 1 && length cs <= 8 + let var = "x-" ++ cs + return $ map toLower var diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs index 14a0b8044..120ba8fee 100644 --- a/src/Text/Pandoc/Class.hs +++ b/src/Text/Pandoc/Class.hs @@ -45,6 +45,7 @@ module Text.Pandoc.Class ( PandocMonad(..) , getZonedTime , readFileFromDirs , report + , setTrace , getLog , setVerbosity , getMediaBag @@ -78,7 +79,7 @@ import qualified Text.Pandoc.Shared as IO ( readDataFile import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Compat.Time (UTCTime) import Text.Pandoc.Logging -import Text.Parsec (ParsecT) +import Text.Parsec (ParsecT, getPosition, sourceLine, sourceName) import qualified Text.Pandoc.Compat.Time as IO (getCurrentTime) import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) import Text.Pandoc.Definition @@ -107,7 +108,7 @@ import qualified System.FilePath.Glob as IO (glob) import qualified System.Directory as IO (getModificationTime) import Control.Monad as M (fail) import Control.Monad.Reader (ReaderT) -import Control.Monad.State +import Control.Monad.State.Strict import Control.Monad.Except import Control.Monad.Writer (WriterT) import Control.Monad.RWS (RWST) @@ -117,6 +118,7 @@ import System.IO.Error import System.IO (stderr) import qualified Data.Map as M import Text.Pandoc.Error +import qualified Debug.Trace class (Functor m, Applicative m, Monad m, MonadError PandocError m) => PandocMonad m where @@ -140,6 +142,11 @@ class (Functor m, Applicative m, Monad m, MonadError PandocError m) modifyCommonState :: (CommonState -> CommonState) -> m () modifyCommonState f = getCommonState >>= putCommonState . f + trace :: String -> m () + trace msg = do + tracing <- getsCommonState stTrace + when tracing $ Debug.Trace.trace ("[trace] " ++ msg) (return ()) + logOutput :: LogMessage -> m () -- Functions defined for all PandocMonad instances @@ -155,10 +162,11 @@ report :: PandocMonad m => LogMessage -> m () report msg = do verbosity <- getsCommonState stVerbosity let level = messageVerbosity msg - when (level <= verbosity) $ - logOutput msg - unless (level == DEBUG) $ - modifyCommonState $ \st -> st{ stLog = msg : stLog st } + when (level <= verbosity) $ logOutput msg + modifyCommonState $ \st -> st{ stLog = msg : stLog st } + +setTrace :: PandocMonad m => Bool -> m () +setTrace useTracing = modifyCommonState $ \st -> st{stTrace = useTracing} setMediaBag :: PandocMonad m => MediaBag -> m () setMediaBag mb = modifyCommonState $ \st -> st{stMediaBag = mb} @@ -208,6 +216,7 @@ data CommonState = CommonState { stLog :: [LogMessage] , stOutputFile :: Maybe FilePath , stResourcePath :: [FilePath] , stVerbosity :: Verbosity + , stTrace :: Bool } instance Default CommonState where @@ -217,6 +226,7 @@ instance Default CommonState where , stOutputFile = Nothing , stResourcePath = ["."] , stVerbosity = WARNING + , stTrace = False } runIO :: PandocIO a -> IO (Either PandocError a) @@ -561,8 +571,20 @@ instance PandocMonad m => PandocMonad (ParsecT s st m) where getModificationTime = lift . getModificationTime getCommonState = lift getCommonState putCommonState = lift . putCommonState + trace msg = do + tracing <- getsCommonState stTrace + when tracing $ do + pos <- getPosition + Debug.Trace.trace + ("[trace] Parsed " ++ msg ++ " at line " ++ + show (sourceLine pos) ++ + if sourceName pos == "chunk" + then " of chunk" + else "") + (return ()) logOutput = lift . logOutput + instance PandocMonad m => PandocMonad (ReaderT r m) where lookupEnv = lift . lookupEnv getCurrentTime = lift getCurrentTime diff --git a/src/Text/Pandoc/Compat/Time.hs b/src/Text/Pandoc/Compat/Time.hs index 1de197801..b1cde82a4 100644 --- a/src/Text/Pandoc/Compat/Time.hs +++ b/src/Text/Pandoc/Compat/Time.hs @@ -27,4 +27,4 @@ where import Data.Time import System.Locale ( defaultTimeLocale ) -#endif
\ No newline at end of file +#endif diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 3cf381168..24186720c 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -61,7 +61,10 @@ data PandocError = PandocIOError String IOError | PandocFilterError String String | PandocCouldNotFindDataFileError String | PandocResourceNotFound String + | PandocTemplateError String | PandocAppError String + | PandocEpubSubdirectoryError String + | PandocMacroLoop String deriving (Show, Typeable, Generic) instance Exception PandocError @@ -101,7 +104,12 @@ handleError (Left e) = "Could not find data file " ++ fn PandocResourceNotFound fn -> err 99 $ "File " ++ fn ++ " not found in resource path" + PandocTemplateError s -> err 5 s PandocAppError s -> err 1 s + PandocEpubSubdirectoryError s -> err 31 $ + "EPUB subdirectory name '" ++ s ++ "' contains illegal characters" + PandocMacroLoop s -> err 91 $ + "Loop encountered in expanding macro " ++ s err :: Int -> String -> IO a err exitCode msg = do diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 58e8c414d..28459d4e6 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -45,7 +45,7 @@ module Text.Pandoc.Extensions ( Extension(..) , githubMarkdownExtensions , multimarkdownExtensions ) where -import Data.Bits (clearBit, setBit, testBit) +import Data.Bits (clearBit, setBit, testBit, (.|.)) import Data.Data (Data) import Data.Typeable (Typeable) import GHC.Generics (Generic) @@ -55,6 +55,10 @@ import Text.Parsec newtype Extensions = Extensions Integer deriving (Show, Read, Eq, Ord, Data, Typeable, Generic) +instance Monoid Extensions where + mempty = Extensions 0 + mappend (Extensions a) (Extensions b) = Extensions (a .|. b) + extensionsFromList :: [Extension] -> Extensions extensionsFromList = foldr enableExtension emptyExtensions @@ -94,6 +98,7 @@ data Extension = | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks | Ext_inline_code_attributes -- ^ Allow attributes on inline code + | Ext_raw_attribute -- ^ Allow explicit raw blocks/inlines | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks | Ext_native_divs -- ^ Use Div blocks for contents of <div> tags | Ext_native_spans -- ^ Use Span inlines for contents of <span> @@ -162,6 +167,7 @@ pandocExtensions = extensionsFromList , Ext_fenced_code_attributes , Ext_backtick_code_blocks , Ext_inline_code_attributes + , Ext_raw_attribute , Ext_markdown_in_html_blocks , Ext_native_divs , Ext_native_spans @@ -238,7 +244,6 @@ githubMarkdownExtensions = extensionsFromList , Ext_space_in_atx_header , Ext_intraword_underscores , Ext_strikeout - , Ext_hard_line_breaks , Ext_emoji , Ext_lists_without_preceding_blankline , Ext_shortcut_reference_links @@ -275,6 +280,8 @@ multimarkdownExtensions = extensionsFromList , Ext_subscript , Ext_backtick_code_blocks , Ext_spaced_reference_links + -- So far only in dev version of mmd: + , Ext_raw_attribute ] -- | Language extensions to be used with strict markdown. @@ -311,6 +318,7 @@ getDefaultExtensions "epub2" = getDefaultExtensions "epub" getDefaultExtensions "epub3" = getDefaultExtensions "epub" getDefaultExtensions "latex" = extensionsFromList [Ext_smart, + Ext_latex_macros, Ext_auto_identifiers] getDefaultExtensions "context" = extensionsFromList [Ext_smart, diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs index da8c775f6..1dcff7470 100644 --- a/src/Text/Pandoc/Logging.hs +++ b/src/Text/Pandoc/Logging.hs @@ -52,7 +52,7 @@ import Text.Pandoc.Definition import Text.Parsec.Pos -- | Verbosity level. -data Verbosity = ERROR | WARNING | INFO | DEBUG +data Verbosity = ERROR | WARNING | INFO deriving (Show, Read, Eq, Data, Enum, Ord, Bounded, Typeable, Generic) instance ToJSON Verbosity where @@ -63,7 +63,6 @@ instance FromJSON Verbosity where "ERROR" -> return ERROR "WARNING" -> return WARNING "INFO" -> return INFO - "DEBUG" -> return DEBUG _ -> mzero parseJSON _ = mzero @@ -78,7 +77,7 @@ data LogMessage = | CircularReference String SourcePos | ParsingUnescaped String SourcePos | CouldNotLoadIncludeFile String SourcePos - | ParsingTrace String SourcePos + | MacroAlreadyDefined String SourcePos | InlineNotRendered Inline | BlockNotRendered Block | DocxParserWarning String @@ -92,7 +91,9 @@ data LogMessage = | Extracting String | NoTitleElement String | NoLangSpecified + | InvalidLang String | CouldNotHighlight String + | MissingCharacter String deriving (Show, Eq, Data, Ord, Typeable, Generic) instance ToJSON LogMessage where @@ -150,11 +151,11 @@ instance ToJSON LogMessage where "source" .= Text.pack (sourceName pos), "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] - ParsingTrace s pos -> - ["contents" .= Text.pack s, + MacroAlreadyDefined name pos -> + ["name" .= Text.pack name, "source" .= Text.pack (sourceName pos), - "line" .= sourceLine pos, - "column" .= sourceColumn pos] + "line" .= toJSON (sourceLine pos), + "column" .= toJSON (sourceColumn pos)] InlineNotRendered il -> ["contents" .= toJSON il] BlockNotRendered bl -> @@ -184,8 +185,12 @@ instance ToJSON LogMessage where NoTitleElement fallback -> ["fallback" .= Text.pack fallback] NoLangSpecified -> [] + InvalidLang s -> + ["lang" .= Text.pack s] CouldNotHighlight msg -> ["message" .= Text.pack msg] + MissingCharacter msg -> + ["message" .= Text.pack msg] showPos :: SourcePos -> String showPos pos = sn ++ "line " ++ @@ -225,8 +230,8 @@ showLogMessage msg = "Parsing unescaped '" ++ s ++ "' at " ++ showPos pos CouldNotLoadIncludeFile fp pos -> "Could not load include file '" ++ fp ++ "' at " ++ showPos pos - ParsingTrace s pos -> - "Parsing trace at " ++ showPos pos ++ ": " ++ s + MacroAlreadyDefined name pos -> + "Macro '" ++ name ++ "' already defined, ignoring at " ++ showPos pos InlineNotRendered il -> "Not rendering " ++ show il BlockNotRendered bl -> @@ -260,8 +265,13 @@ showLogMessage msg = NoLangSpecified -> "No value for 'lang' was specified in the metadata.\n" ++ "It is recommended that lang be specified for this format." + InvalidLang s -> + "Invalid 'lang' value '" ++ s ++ "'.\n" ++ + "Use an IETF language tag like 'en-US'." CouldNotHighlight m -> "Could not highlight code block:\n" ++ m + MissingCharacter m -> + "Missing character: " ++ m messageVerbosity:: LogMessage -> Verbosity messageVerbosity msg = @@ -275,8 +285,8 @@ messageVerbosity msg = ReferenceNotFound{} -> WARNING CircularReference{} -> WARNING CouldNotLoadIncludeFile{} -> WARNING + MacroAlreadyDefined{} -> WARNING ParsingUnescaped{} -> INFO - ParsingTrace{} -> DEBUG InlineNotRendered{} -> INFO BlockNotRendered{} -> INFO DocxParserWarning{} -> WARNING @@ -290,4 +300,6 @@ messageVerbosity msg = Extracting{} -> INFO NoTitleElement{} -> WARNING NoLangSpecified -> INFO + InvalidLang{} -> WARNING CouldNotHighlight{} -> WARNING + MissingCharacter{} -> WARNING diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs index f74c0e425..22b68d5e0 100644 --- a/src/Text/Pandoc/Lua.hs +++ b/src/Text/Pandoc/Lua.hs @@ -1,3 +1,8 @@ +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE ScopedTypeVariables #-} {- Copyright © 2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -15,9 +20,6 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE MultiParamTypeClasses #-} {- | Module : Text.Pandoc.Lua Copyright : Copyright © 2017 Albert Krewinkel @@ -28,11 +30,17 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Pandoc lua utils. -} -module Text.Pandoc.Lua ( runLuaFilter, pushPandocModule ) where +module Text.Pandoc.Lua ( LuaException(..), + runLuaFilter, + pushPandocModule ) where -import Control.Monad (unless, when, (>=>)) +import Control.Exception +import Control.Monad (unless, when, (>=>), mplus) import Control.Monad.Trans (MonadIO (..)) +import Data.Data (toConstr, showConstr, dataTypeOf, dataTypeConstrs, Data) import Data.Map (Map) +import Data.Maybe (isJust) +import Data.Typeable (Typeable) import Scripting.Lua (LuaState, StackValue (..)) import Text.Pandoc.Definition import Text.Pandoc.Lua.PandocModule (pushPandocModule) @@ -42,24 +50,25 @@ import Text.Pandoc.Walk import qualified Data.Map as Map import qualified Scripting.Lua as Lua +newtype LuaException = LuaException String + deriving (Show, Typeable) + +instance Exception LuaException + runLuaFilter :: (MonadIO m) - => FilePath -> [String] -> Pandoc -> m Pandoc -runLuaFilter filterPath args pd = liftIO $ do + => Maybe FilePath -> FilePath -> [String] -> Pandoc -> m Pandoc +runLuaFilter datadir filterPath args pd = liftIO $ do lua <- Lua.newstate Lua.openlibs lua - -- create table in registry to store filter functions - Lua.push lua "PANDOC_FILTER_FUNCTIONS" - Lua.newtable lua - Lua.rawset lua Lua.registryindex -- store module in global "pandoc" - pushPandocModule lua + pushPandocModule datadir lua Lua.setglobal lua "pandoc" top <- Lua.gettop lua status <- Lua.loadfile lua filterPath - if (status /= 0) + if status /= 0 then do Just luaErrMsg <- Lua.peek lua 1 - error luaErrMsg + throwIO (LuaException luaErrMsg) else do Lua.call lua 0 Lua.multret newtop <- Lua.gettop lua @@ -80,157 +89,91 @@ pushGlobalFilter lua = *> Lua.rawseti lua (-2) 1 runAll :: [LuaFilter] -> Pandoc -> IO Pandoc -runAll [] = return -runAll (x:xs) = walkMWithLuaFilter x >=> runAll xs +runAll = foldr ((>=>) . walkMWithLuaFilter) return walkMWithLuaFilter :: LuaFilter -> Pandoc -> IO Pandoc walkMWithLuaFilter (LuaFilter lua fnMap) = - walkM (execInlineLuaFilter lua fnMap) >=> - walkM (execBlockLuaFilter lua fnMap) >=> - walkM (execMetaLuaFilter lua fnMap) >=> - walkM (execDocLuaFilter lua fnMap) + (if hasOneOf (constructorsFor (dataTypeOf (Str []))) + then walkM (tryFilter lua fnMap :: Inline -> IO Inline) + else return) + >=> + (if hasOneOf (constructorsFor (dataTypeOf (Para []))) + then walkM (tryFilter lua fnMap :: Block -> IO Block) + else return) + >=> + (case Map.lookup "Meta" fnMap of + Just fn -> walkM (\(Pandoc meta blocks) -> do + meta' <- runFilterFunction lua fn meta + return $ Pandoc meta' blocks) + Nothing -> return) + >=> + (case Map.lookup "Pandoc" fnMap `mplus` Map.lookup "Doc" fnMap of + Just fn -> (runFilterFunction lua fn) :: Pandoc -> IO Pandoc + Nothing -> return) + where hasOneOf = any (\k -> isJust (Map.lookup k fnMap)) + constructorsFor x = map show (dataTypeConstrs x) type FunctionMap = Map String LuaFilterFunction data LuaFilter = LuaFilter LuaState FunctionMap newtype LuaFilterFunction = LuaFilterFunction { functionIndex :: Int } -execDocLuaFilter :: LuaState - -> FunctionMap - -> Pandoc -> IO Pandoc -execDocLuaFilter lua fnMap x = do - let docFnName = "Doc" - case Map.lookup docFnName fnMap of +tryFilter :: (Data a, StackValue a) => LuaState -> FunctionMap -> a -> IO a +tryFilter lua fnMap x = + let filterFnName = showConstr (toConstr x) in + case Map.lookup filterFnName fnMap of Nothing -> return x - Just fn -> runLuaFilterFunction lua fn x - -execMetaLuaFilter :: LuaState - -> FunctionMap - -> Pandoc -> IO Pandoc -execMetaLuaFilter lua fnMap pd@(Pandoc meta blks) = do - let metaFnName = "Meta" - case Map.lookup metaFnName fnMap of - Nothing -> return pd - Just fn -> do - meta' <- runLuaFilterFunction lua fn meta - return $ Pandoc meta' blks - -execBlockLuaFilter :: LuaState - -> FunctionMap - -> Block -> IO Block -execBlockLuaFilter lua fnMap x = do - let tryFilter :: String -> IO Block - tryFilter filterFnName = - case Map.lookup filterFnName fnMap of - Nothing -> return x - Just fn -> runLuaFilterFunction lua fn x - case x of - BlockQuote _ -> tryFilter "BlockQuote" - BulletList _ -> tryFilter "BulletList" - CodeBlock _ _ -> tryFilter "CodeBlock" - DefinitionList _ -> tryFilter "DefinitionList" - Div _ _ -> tryFilter "Div" - Header _ _ _ -> tryFilter "Header" - HorizontalRule -> tryFilter "HorizontalRule" - LineBlock _ -> tryFilter "LineBlock" - Null -> tryFilter "Null" - Para _ -> tryFilter "Para" - Plain _ -> tryFilter "Plain" - RawBlock _ _ -> tryFilter "RawBlock" - OrderedList _ _ -> tryFilter "OrderedList" - Table _ _ _ _ _ -> tryFilter "Table" - -execInlineLuaFilter :: LuaState - -> FunctionMap - -> Inline -> IO Inline -execInlineLuaFilter lua fnMap x = do - let tryFilter :: String -> IO Inline - tryFilter filterFnName = - case Map.lookup filterFnName fnMap of - Nothing -> return x - Just fn -> runLuaFilterFunction lua fn x - let tryFilterAlternatives :: [String] -> IO Inline - tryFilterAlternatives [] = return x - tryFilterAlternatives (fnName : alternatives) = - case Map.lookup fnName fnMap of - Nothing -> tryFilterAlternatives alternatives - Just fn -> runLuaFilterFunction lua fn x - case x of - Cite _ _ -> tryFilter "Cite" - Code _ _ -> tryFilter "Code" - Emph _ -> tryFilter "Emph" - Image _ _ _ -> tryFilter "Image" - LineBreak -> tryFilter "LineBreak" - Link _ _ _ -> tryFilter "Link" - Math DisplayMath _ -> tryFilterAlternatives ["DisplayMath", "Math"] - Math InlineMath _ -> tryFilterAlternatives ["InlineMath", "Math"] - Note _ -> tryFilter "Note" - Quoted DoubleQuote _ -> tryFilterAlternatives ["DoubleQuoted", "Quoted"] - Quoted SingleQuote _ -> tryFilterAlternatives ["SingleQuoted", "Quoted"] - RawInline _ _ -> tryFilter "RawInline" - SmallCaps _ -> tryFilter "SmallCaps" - SoftBreak -> tryFilter "SoftBreak" - Space -> tryFilter "Space" - Span _ _ -> tryFilter "Span" - Str _ -> tryFilter "Str" - Strikeout _ -> tryFilter "Strikeout" - Strong _ -> tryFilter "Strong" - Subscript _ -> tryFilter "Subscript" - Superscript _ -> tryFilter "Superscript" + Just fn -> runFilterFunction lua fn x instance StackValue LuaFilter where valuetype _ = Lua.TTABLE push = undefined peek lua idx = fmap (LuaFilter lua) <$> Lua.peek lua idx --- | Helper class for pushing a single value to the stack via a lua function. --- See @pushViaCall@. -class PushViaFilterFunction a where - pushViaFilterFunction' :: LuaState -> LuaFilterFunction -> IO () -> Int -> a - -instance StackValue a => PushViaFilterFunction (IO a) where - pushViaFilterFunction' lua lf pushArgs num = do - pushFilterFunction lua lf - pushArgs - Lua.call lua num 1 - mbres <- Lua.peek lua (-1) - case mbres of - Nothing -> error $ "Error while trying to get a filter's return " - ++ "value from lua stack." - Just res -> res <$ Lua.pop lua 1 - -instance (StackValue a, PushViaFilterFunction b) => - PushViaFilterFunction (a -> b) where - pushViaFilterFunction' lua lf pushArgs num x = - pushViaFilterFunction' lua lf (pushArgs *> push lua x) (num + 1) - --- | Push an value to the stack via a lua filter function. The function is --- called with all arguments that are passed to this function and is expected to --- return a single value. -runLuaFilterFunction :: PushViaFilterFunction a - => LuaState -> LuaFilterFunction -> a -runLuaFilterFunction lua lf = pushViaFilterFunction' lua lf (return ()) 0 +-- | Push a value to the stack via a lua filter function. The filter function is +-- called with given element as argument and is expected to return an element. +-- Alternatively, the function can return nothing or nil, in which case the +-- element is left unchanged. +runFilterFunction :: StackValue a => LuaState -> LuaFilterFunction -> a -> IO a +runFilterFunction lua lf x = do + pushFilterFunction lua lf + Lua.push lua x + z <- Lua.pcall lua 1 1 0 + if (z /= 0) + then do + msg <- Lua.peek lua (-1) + let prefix = "Error while running filter function: " + throwIO . LuaException $ + case msg of + Nothing -> prefix ++ "could not read error message" + Just msg' -> prefix ++ msg' + else do + resType <- Lua.ltype lua (-1) + case resType of + Lua.TNIL -> Lua.pop lua 1 *> return x + _ -> do + mbres <- Lua.peek lua (-1) + case mbres of + Nothing -> throwIO $ LuaException + ("Error while trying to get a filter's return " + ++ "value from lua stack.") + Just res -> res <$ Lua.pop lua 1 -- | Push the filter function to the top of the stack. pushFilterFunction :: Lua.LuaState -> LuaFilterFunction -> IO () -pushFilterFunction lua lf = do +pushFilterFunction lua lf = -- The function is stored in a lua registry table, retrieve it from there. - push lua ("PANDOC_FILTER_FUNCTIONS"::String) - Lua.rawget lua Lua.registryindex - Lua.rawgeti lua (-1) (functionIndex lf) - Lua.remove lua (-2) -- remove registry table from stack + Lua.rawgeti lua Lua.registryindex (functionIndex lf) + +registerFilterFunction :: LuaState -> Int -> IO LuaFilterFunction +registerFilterFunction lua idx = do + isFn <- Lua.isfunction lua idx + unless isFn . throwIO . LuaException $ "Not a function at index " ++ show idx + Lua.pushvalue lua idx + refIdx <- Lua.ref lua Lua.registryindex + return $ LuaFilterFunction refIdx instance StackValue LuaFilterFunction where valuetype _ = Lua.TFUNCTION - push lua v = pushFilterFunction lua v - peek lua i = do - isFn <- Lua.isfunction lua i - unless isFn (error $ "Not a function at index " ++ (show i)) - Lua.pushvalue lua i - push lua ("PANDOC_FILTER_FUNCTIONS"::String) - Lua.rawget lua Lua.registryindex - len <- Lua.objlen lua (-1) - Lua.insert lua (-2) - Lua.rawseti lua (-2) (len + 1) - Lua.pop lua 1 - return . Just $ LuaFilterFunction (len + 1) + push = pushFilterFunction + peek = fmap (fmap Just) . registerFilterFunction diff --git a/src/Text/Pandoc/Lua/PandocModule.hs b/src/Text/Pandoc/Lua/PandocModule.hs index 27c19d4f0..2d0baf4f8 100644 --- a/src/Text/Pandoc/Lua/PandocModule.hs +++ b/src/Text/Pandoc/Lua/PandocModule.hs @@ -34,15 +34,16 @@ import Data.Text (pack) import Scripting.Lua (LuaState, call, push, pushhsfunction, rawset) import Text.Pandoc.Class hiding (readDataFile) import Text.Pandoc.Definition (Pandoc) +import Text.Pandoc.Options (ReaderOptions(readerExtensions)) import Text.Pandoc.Lua.Compat (loadstring) import Text.Pandoc.Lua.StackInstances () import Text.Pandoc.Readers (Reader (..), getReader) import Text.Pandoc.Shared (readDataFile) -- | Push the "pandoc" on the lua stack. -pushPandocModule :: LuaState -> IO () -pushPandocModule lua = do - script <- pandocModuleScript +pushPandocModule :: Maybe FilePath -> LuaState -> IO () +pushPandocModule datadir lua = do + script <- pandocModuleScript datadir status <- loadstring lua script "pandoc.lua" unless (status /= 0) $ call lua 0 1 push lua "__read" @@ -50,17 +51,17 @@ pushPandocModule lua = do rawset lua (-3) -- | Get the string representation of the pandoc module -pandocModuleScript :: IO String -pandocModuleScript = unpack <$> readDataFile Nothing "pandoc.lua" +pandocModuleScript :: Maybe FilePath -> IO String +pandocModuleScript datadir = unpack <$> readDataFile datadir "pandoc.lua" read_doc :: String -> String -> IO (Either String Pandoc) read_doc formatSpec content = do case getReader formatSpec of Left s -> return $ Left s - Right reader -> + Right (reader, es) -> case reader of TextReader r -> do - res <- runIO $ r def (pack content) + res <- runIO $ r def{ readerExtensions = es } (pack content) case res of Left s -> return . Left $ show s Right pd -> return $ Right pd diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index c7211c86e..d7e77010e 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -61,7 +61,6 @@ data ReaderOptions = ReaderOptions{ , readerStandalone :: Bool -- ^ Standalone document with header , readerColumns :: Int -- ^ Number of columns in terminal , readerTabStop :: Int -- ^ Tab stop - , readerApplyMacros :: Bool -- ^ Apply macros to TeX math , readerIndentedCodeClasses :: [String] -- ^ Default classes for -- indented code blocks , readerAbbreviations :: Set.Set String -- ^ Strings to treat as abbreviations @@ -75,7 +74,6 @@ instance Default ReaderOptions , readerStandalone = False , readerColumns = 80 , readerTabStop = 4 - , readerApplyMacros = True , readerIndentedCodeClasses = [] , readerAbbreviations = defaultAbbrevs , readerDefaultImageExtension = "" @@ -213,6 +211,7 @@ data WriterOptions = WriterOptions , writerHighlightStyle :: Maybe Style -- ^ Style to use for highlighting -- (Nothing = no highlighting) , writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown + , writerEpubSubdirectory :: String -- ^ Subdir for epub in OCF , writerEpubMetadata :: Maybe String -- ^ Metadata to include in EPUB , writerEpubFonts :: [FilePath] -- ^ Paths to fonts to embed , writerEpubChapterLevel :: Int -- ^ Header level for chapters (separate files) @@ -249,6 +248,7 @@ instance Default WriterOptions where , writerListings = False , writerHighlightStyle = Just pygments , writerSetextHeaders = True + , writerEpubSubdirectory = "EPUB" , writerEpubMetadata = Nothing , writerEpubFonts = [] , writerEpubChapterLevel = 1 diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index cd75d869d..25a94972a 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -40,7 +40,6 @@ import qualified Data.Text as T import Data.Text (Text) import qualified Data.ByteString as BS import Data.ByteString.Lazy (ByteString) -import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BC import Data.Maybe (fromMaybe) @@ -197,7 +196,22 @@ tex2pdf' verbosity args tmpDir program source = do _ -> "" return $ Left $ logmsg <> extramsg (ExitSuccess, Nothing) -> return $ Left "" - (ExitSuccess, Just pdf) -> return $ Right pdf + (ExitSuccess, Just pdf) -> do + missingCharacterWarnings verbosity log' + return $ Right pdf + +missingCharacterWarnings :: Verbosity -> ByteString -> IO () +missingCharacterWarnings verbosity log' = do + let ls = BC.lines log' + let isMissingCharacterWarning = BC.isPrefixOf "Missing character: " + let warnings = [ UTF8.toStringLazy (BC.drop 19 l) + | l <- ls + , isMissingCharacterWarning l + ] + runIO $ do + setVerbosity verbosity + mapM_ (report . MissingCharacter) warnings + return () -- parsing output @@ -255,12 +269,12 @@ runTeXProgram verbosity program args runNumber numRuns tmpDir source = do mapM_ print env'' putStr "\n" putStrLn $ "[makePDF] Contents of " ++ file' ++ ":" - B.readFile file' >>= B.putStr + BL.readFile file' >>= BL.putStr putStr "\n" (exit, out) <- pipeProcess (Just env'') program programArgs BL.empty when (verbosity >= INFO) $ do putStrLn $ "[makePDF] Run #" ++ show runNumber - B.hPutStr stdout out + BL.hPutStr stdout out putStr "\n" if runNumber <= numRuns then runTeXProgram verbosity program args (runNumber + 1) numRuns tmpDir source @@ -271,9 +285,16 @@ runTeXProgram verbosity program args runNumber numRuns tmpDir source = do -- We read PDF as a strict bytestring to make sure that the -- temp directory is removed on Windows. -- See https://github.com/jgm/pandoc/issues/1192. - then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile + then (Just . BL.fromChunks . (:[])) `fmap` BS.readFile pdfFile else return Nothing - return (exit, out, pdf) + -- Note that some things like Missing character warnings + -- appear in the log but not on stderr, so we prefer the log: + let logFile = replaceExtension file ".log" + logExists <- doesFileExist logFile + log' <- if logExists + then BL.readFile logFile + else return out + return (exit, log', pdf) ms2pdf :: Verbosity -> [String] @@ -294,7 +315,7 @@ ms2pdf verbosity args source = do (exit, out) <- pipeProcess (Just env') "pdfroff" args (BL.fromStrict $ UTF8.fromText source) when (verbosity >= INFO) $ do - B.hPutStr stdout out + BL.hPutStr stdout out putStr "\n" return $ case exit of ExitFailure _ -> Left out @@ -318,12 +339,12 @@ html2pdf verbosity args source = do mapM_ print env' putStr "\n" putStrLn $ "[makePDF] Contents of " ++ file ++ ":" - B.readFile file >>= B.putStr + BL.readFile file >>= BL.putStr putStr "\n" (exit, out) <- pipeProcess (Just env') "wkhtmltopdf" programArgs BL.empty removeFile file when (verbosity >= INFO) $ do - B.hPutStr stdout out + BL.hPutStr stdout out putStr "\n" pdfExists <- doesFileExist pdfFile mbPdf <- if pdfExists @@ -331,7 +352,7 @@ html2pdf verbosity args source = do -- temp directory is removed on Windows. -- See https://github.com/jgm/pandoc/issues/1192. then do - res <- (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile + res <- (Just . BL.fromChunks . (:[])) `fmap` BS.readFile pdfFile removeFile pdfFile return res else return Nothing @@ -365,11 +386,11 @@ context2pdf verbosity tmpDir source = inDirectory tmpDir $ do mapM_ print env' putStr "\n" putStrLn $ "[makePDF] Contents of " ++ file ++ ":" - B.readFile file >>= B.putStr + BL.readFile file >>= BL.putStr putStr "\n" (exit, out) <- pipeProcess (Just env') "context" programArgs BL.empty when (verbosity >= INFO) $ do - B.hPutStr stdout out + BL.hPutStr stdout out putStr "\n" let pdfFile = replaceExtension file ".pdf" pdfExists <- doesFileExist pdfFile @@ -377,7 +398,7 @@ context2pdf verbosity tmpDir source = inDirectory tmpDir $ do -- We read PDF as a strict bytestring to make sure that the -- temp directory is removed on Windows. -- See https://github.com/jgm/pandoc/issues/1192. - then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile + then (Just . BL.fromChunks . (:[])) `fmap` BS.readFile pdfFile else return Nothing case (exit, mbPdf) of (ExitFailure _, _) -> do diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index cd51bff69..549042d14 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -35,7 +35,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA A utility library with parsers used in pandoc readers. -} -module Text.Pandoc.Parsing ( anyLine, +module Text.Pandoc.Parsing ( takeWhileP, + takeP, + anyLine, anyLineNewline, indentWith, many1Till, @@ -109,8 +111,6 @@ module Text.Pandoc.Parsing ( anyLine, dash, nested, citeKey, - macro, - applyMacros', Parser, ParserT, F, @@ -130,6 +130,7 @@ module Text.Pandoc.Parsing ( anyLine, runParser, runParserT, parse, + tokenPrim, anyToken, getInput, setInput, @@ -178,24 +179,27 @@ module Text.Pandoc.Parsing ( anyLine, sourceLine, setSourceColumn, setSourceLine, - newPos + newPos, + Line, + Column ) where +import Data.Text (Text) import Text.Pandoc.Definition import Text.Pandoc.Options -import Text.Pandoc.Builder (Blocks, Inlines, rawBlock, HasMeta(..), trimInlines) +import Text.Pandoc.Builder (Blocks, Inlines, HasMeta(..), trimInlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.XML (fromEntities) import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) import Text.Parsec hiding (token) -import Text.Parsec.Pos (newPos) +import Text.Parsec.Pos (newPos, initialPos, updatePosString) import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum, isHexDigit, isSpace, isPunctuation ) import Data.List ( intercalate, transpose, isSuffixOf ) import Text.Pandoc.Shared import qualified Data.Map as M -import Text.TeXMath.Readers.TeX.Macros (applyMacros, Macro, pMacroDefinition) +import Text.Pandoc.Readers.LaTeX.Types (Macro) import Text.HTML.TagSoup.Entity ( lookupEntity ) import Text.Pandoc.Asciify (toAsciiChar) import Data.Monoid ((<>)) @@ -242,6 +246,35 @@ instance Monoid a => Monoid (Future s a) where mappend = liftM2 mappend mconcat = liftM mconcat . sequence +-- | Parse characters while a predicate is true. +takeWhileP :: Stream [Char] m Char + => (Char -> Bool) -> ParserT [Char] st m [Char] +takeWhileP f = do + -- faster than 'many (satisfy f)' + inp <- getInput + pos <- getPosition + let (xs, rest) = span f inp + -- needed to persuade parsec that this won't match an empty string: + anyChar + setInput rest + setPosition $ updatePosString pos xs + return xs + +-- Parse n characters of input (or the rest of the input if +-- there aren't n characters). +takeP :: Stream [Char] m Char => Int -> ParserT [Char] st m [Char] +takeP n = do + guard (n > 0) + -- faster than 'count n anyChar' + inp <- getInput + pos <- getPosition + let (xs, rest) = splitAt n inp + -- needed to persuade parsec that this won't match an empty string: + anyChar + setInput rest + setPosition $ updatePosString pos xs + return xs + -- | Parse any line of text anyLine :: Stream [Char] m Char => ParserT [Char] st m [Char] anyLine = do @@ -366,6 +399,7 @@ parseFromString :: Monad m -> ParserT String st m a parseFromString parser str = do oldPos <- getPosition + setPosition $ initialPos "chunk" oldInput <- getInput setInput str result <- parser @@ -993,7 +1027,7 @@ data ParserState = ParserState stateIdentifiers :: Set.Set String, -- ^ Header identifiers used stateNextExample :: Int, -- ^ Number of next example stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers - stateMacros :: [Macro], -- ^ List of macros defined so far + stateMacros :: M.Map Text Macro, -- ^ Table of macros defined so far stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role stateRstCustomRoles :: M.Map String (String, Maybe String, Attr), -- ^ Current rST custom text roles -- Triple represents: 1) Base role, 2) Optional format (only for :raw: @@ -1056,8 +1090,8 @@ instance HasIdentifierList ParserState where updateIdentifierList f st = st{ stateIdentifiers = f $ stateIdentifiers st } class HasMacros st where - extractMacros :: st -> [Macro] - updateMacros :: ([Macro] -> [Macro]) -> st -> st + extractMacros :: st -> M.Map Text Macro + updateMacros :: (M.Map Text Macro -> M.Map Text Macro) -> st -> st instance HasMacros ParserState where extractMacros = stateMacros @@ -1111,7 +1145,7 @@ defaultParserState = stateIdentifiers = Set.empty, stateNextExample = 1, stateExamples = M.empty, - stateMacros = [], + stateMacros = M.empty, stateRstDefaultRole = "title-reference", stateRstCustomRoles = M.empty, stateCaption = Nothing, @@ -1340,33 +1374,6 @@ token :: (Stream s m t) -> ParsecT s st m a token pp pos match = tokenPrim pp (\_ t _ -> pos t) match --- --- Macros --- - --- | Parse a \newcommand or \newenviroment macro definition. -macro :: (Stream [Char] m Char, HasMacros st, HasReaderOptions st) - => ParserT [Char] st m Blocks -macro = do - apply <- getOption readerApplyMacros - (m, def') <- withRaw pMacroDefinition - if apply - then do - updateState $ \st -> updateMacros (m:) st - return mempty - else return $ rawBlock "latex" def' - --- | Apply current macros to string. -applyMacros' :: (HasReaderOptions st, HasMacros st, Stream [Char] m Char) - => String - -> ParserT [Char] st m String -applyMacros' target = do - apply <- getOption readerApplyMacros - if apply - then do macros <- extractMacros <$> getState - return $ applyMacros macros target - else return target - infixr 5 <+?> (<+?>) :: (Monoid a) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>) @@ -1384,10 +1391,11 @@ extractIdClass (ident, cls, kvs) = (ident', cls', kvs') insertIncludedFile' :: (PandocMonad m, HasIncludeFiles st, Functor mf, Applicative mf, Monad mf) - => ParserT String st m (mf Blocks) + => ParserT [a] st m (mf Blocks) + -> (String -> [a]) -> [FilePath] -> FilePath - -> ParserT String st m (mf Blocks) -insertIncludedFile' blocks dirs f = do + -> ParserT [a] st m (mf Blocks) +insertIncludedFile' blocks totoks dirs f = do oldPos <- getPosition oldInput <- getInput containers <- getIncludeFiles <$> getState @@ -1401,7 +1409,7 @@ insertIncludedFile' blocks dirs f = do report $ CouldNotLoadIncludeFile f oldPos return "" setPosition $ newPos f 1 1 - setInput contents + setInput $ totoks contents bs <- blocks setInput oldInput setPosition oldPos @@ -1411,11 +1419,12 @@ insertIncludedFile' blocks dirs f = do -- | Parse content of include file as blocks. Circular includes result in an -- @PandocParseError@. insertIncludedFile :: (PandocMonad m, HasIncludeFiles st) - => ParserT String st m Blocks + => ParserT [a] st m Blocks + -> (String -> [a]) -> [FilePath] -> FilePath - -> ParserT String st m Blocks -insertIncludedFile blocks dirs f = - runIdentity <$> insertIncludedFile' (Identity <$> blocks) dirs f + -> ParserT [a] st m Blocks +insertIncludedFile blocks totoks dirs f = + runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f -- | Parse content of include file as future blocks. Circular includes result in -- an @PandocParseError@. @@ -1423,4 +1432,4 @@ insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st) => ParserT String st m (Future st Blocks) -> [FilePath] -> FilePath -> ParserT String st m (Future st Blocks) -insertIncludedFileF = insertIncludedFile' +insertIncludedFileF p = insertIncludedFile' p id diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index d78a2f1d9..1b3c647a1 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -77,7 +77,7 @@ module Text.Pandoc.Pretty ( ) where -import Control.Monad.State +import Control.Monad.State.Strict import Data.Char (isSpace) import Data.Foldable (toList) import Data.List (intersperse) diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index 004fefe25..0374d27d5 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -46,6 +46,7 @@ module Text.Pandoc.Readers , readMarkdown , readCommonMark , readMediaWiki + , readVimwiki , readRST , readOrg , readLaTeX @@ -59,6 +60,7 @@ module Text.Pandoc.Readers , readTWiki , readTxt2Tags , readEPUB + , readMuse -- * Miscellaneous , getReader , getDefaultExtensions @@ -81,6 +83,8 @@ import Text.Pandoc.Readers.HTML import Text.Pandoc.Readers.LaTeX import Text.Pandoc.Readers.Markdown import Text.Pandoc.Readers.MediaWiki +import Text.Pandoc.Readers.Vimwiki +import Text.Pandoc.Readers.Muse import Text.Pandoc.Readers.Native import Text.Pandoc.Readers.Odt import Text.Pandoc.Readers.OPML @@ -113,6 +117,7 @@ readers = [ ("native" , TextReader readNative) ,("commonmark" , TextReader readCommonMark) ,("rst" , TextReader readRST) ,("mediawiki" , TextReader readMediaWiki) + ,("vimwiki" , TextReader readVimwiki) ,("docbook" , TextReader readDocBook) ,("opml" , TextReader readOPML) ,("org" , TextReader readOrg) @@ -125,22 +130,19 @@ readers = [ ("native" , TextReader readNative) ,("odt" , ByteStringReader readOdt) ,("t2t" , TextReader readTxt2Tags) ,("epub" , ByteStringReader readEPUB) + ,("muse" , TextReader readMuse) ] --- | Retrieve reader based on formatSpec (format+extensions). -getReader :: PandocMonad m => String -> Either String (Reader m) +-- | Retrieve reader, extensions based on formatSpec (format+extensions). +getReader :: PandocMonad m => String -> Either String (Reader m, Extensions) getReader s = case parseFormatSpec s of Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e] Right (readerName, setExts) -> case lookup readerName readers of Nothing -> Left $ "Unknown reader: " ++ readerName - Just (TextReader r) -> Right $ TextReader $ \o -> - r o{ readerExtensions = setExts $ - getDefaultExtensions readerName } - Just (ByteStringReader r) -> Right $ ByteStringReader $ \o -> - r o{ readerExtensions = setExts $ - getDefaultExtensions readerName } + Just r -> Right (r, setExts $ + getDefaultExtensions readerName) -- | Read pandoc document from JSON format. readJSON :: ReaderOptions -> Text -> Either PandocError Pandoc diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index bd3c7c356..c1e4d742c 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -1,6 +1,6 @@ module Text.Pandoc.Readers.DocBook ( readDocBook ) where import Data.Char (toUpper) -import Text.Pandoc.Shared (safeRead) +import Text.Pandoc.Shared (safeRead, crFilter) import Text.Pandoc.Options import Text.Pandoc.Definition import Text.Pandoc.Builder @@ -9,7 +9,7 @@ import Text.HTML.TagSoup.Entity (lookupEntity) import Data.Either (rights) import Data.Generics import Data.Char (isSpace) -import Control.Monad.State +import Control.Monad.State.Strict import Data.List (intersperse) import Data.Maybe (fromMaybe) import Text.TeXMath (readMathML, writeTeX) @@ -526,7 +526,8 @@ instance Default DBState where readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readDocBook _ inp = do - let tree = normalizeTree . parseXML . handleInstructions $ T.unpack inp + let tree = normalizeTree . parseXML . handleInstructions + $ T.unpack $ crFilter inp (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock $ tree return $ Pandoc (dbMeta st') (toList . mconcat $ bs) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 2757314ab..21aa358f2 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -76,7 +76,7 @@ module Text.Pandoc.Readers.Docx import Codec.Archive.Zip import Control.Monad.Reader -import Control.Monad.State +import Control.Monad.State.Strict import qualified Data.ByteString.Lazy as B import Data.Default (Default) import Data.List (delete, intersect) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index e6736100f..24615ba94 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -58,7 +58,7 @@ import Codec.Archive.Zip import Control.Applicative ((<|>)) import Control.Monad.Except import Control.Monad.Reader -import Control.Monad.State +import Control.Monad.State.Strict import Data.Bits ((.|.)) import qualified Data.ByteString.Lazy as B import Data.Char (chr, isDigit, ord, readLitChar) diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs index 38f976fd8..b32a73770 100644 --- a/src/Text/Pandoc/Readers/Docx/StyleMap.hs +++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs @@ -7,7 +7,7 @@ module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..) , hasStyleName ) where -import Control.Monad.State +import Control.Monad.State.Strict import Data.Char (toLower) import qualified Data.Map as M import Text.Pandoc.Readers.Docx.Util diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 94f933c4d..734973e33 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -45,7 +45,7 @@ import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..)) import Text.Pandoc.Shared ( extractSpaces, addMetaField - , escapeURI, safeRead ) + , escapeURI, safeRead, crFilter ) import Text.Pandoc.Options (ReaderOptions(readerExtensions), extensionEnabled, Extension (Ext_epub_html_exts, Ext_raw_html, Ext_native_divs, Ext_native_spans)) @@ -53,6 +53,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Walk import qualified Data.Map as M +import Data.Foldable ( for_ ) import Data.Maybe ( fromMaybe, isJust) import Data.List ( intercalate, isPrefixOf ) import Data.Char ( isDigit, isLetter, isAlphaNum ) @@ -71,7 +72,7 @@ import Data.Monoid ((<>)) import Text.Parsec.Error import qualified Data.Set as Set import Text.Pandoc.Error -import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Class (PandocMonad(..)) import Control.Monad.Except (throwError) -- | Convert HTML-formatted string to 'Pandoc' document. @@ -82,7 +83,7 @@ readHtml :: PandocMonad m readHtml opts inp = do let tags = stripPrefixes . canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } - inp + (crFilter inp) parseDoc = do blocks <- (fixPlains False) . mconcat <$> manyTill block eof meta <- stateMeta . parserState <$> getState @@ -134,6 +135,13 @@ type HTMLParser m s = ParserT s HTMLState (ReaderT HTMLLocal m) type TagParser m = HTMLParser m [Tag Text] +pHtml :: PandocMonad m => TagParser m Blocks +pHtml = try $ do + (TagOpen "html" attr) <- lookAhead $ pAnyTag + for_ (lookup "lang" attr) $ + updateState . B.setMeta "lang" . B.text . T.unpack + pInTags "html" block + pBody :: PandocMonad m => TagParser m Blocks pBody = pInTags "body" block @@ -162,7 +170,6 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag block :: PandocMonad m => TagParser m Blocks block = do - pos <- getPosition res <- choice [ eSection , eSwitch B.para block @@ -176,13 +183,14 @@ block = do , pList , pHrule , pTable + , pHtml , pHead , pBody , pDiv , pPlain , pRawHtmlBlock ] - report $ ParsingTrace (take 60 $ show $ B.toList res) pos + trace (take 60 $ show $ B.toList res) return res namespaces :: PandocMonad m => [(String, TagParser m Inlines)] @@ -797,6 +805,8 @@ pCloses tagtype = try $ do (TagClose "dl") | tagtype == "dd" -> return () (TagClose "table") | tagtype == "td" -> return () (TagClose "table") | tagtype == "tr" -> return () + (TagClose t') | tagtype == "p" && t' `Set.member` blockHtmlTags + -> return () -- see #3794 _ -> mzero pTagText :: PandocMonad m => TagParser m Inlines diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index b22b71b96..a09ed8be9 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -27,7 +27,7 @@ import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Options -import Text.Pandoc.Shared (splitBy, trim) +import Text.Pandoc.Shared (splitBy, trim, crFilter) -- | Parse Haddock markup and return a 'Pandoc' document. @@ -35,7 +35,7 @@ readHaddock :: PandocMonad m => ReaderOptions -> Text -> m Pandoc -readHaddock opts s = case readHaddockEither opts (unpack s) of +readHaddock opts s = case readHaddockEither opts (unpack (crFilter s)) of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 17fb48548..a9bafb03b 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,4 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE ScopedTypeVariables #-} {- Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> @@ -28,20 +31,26 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Portability : portable Conversion of LaTeX to 'Pandoc' document. + -} module Text.Pandoc.Readers.LaTeX ( readLaTeX, + applyMacros, rawLaTeXInline, rawLaTeXBlock, - inlineCommand, + macro, + inlineCommand ) where import Control.Applicative (many, optional, (<|>)) import Control.Monad import Control.Monad.Except (throwError) -import Data.Char (chr, isAlphaNum, isLetter, ord) -import Data.Text (Text, unpack) +import Data.Char (chr, isAlphaNum, isLetter, ord, isDigit) +import Data.Default +import Data.Text (Text) +import qualified Data.Text as T import Data.List (intercalate, isPrefixOf) import qualified Data.Map as M +import qualified Data.Set as Set import Data.Maybe (fromMaybe, maybeToList) import Safe (minimumDef) import System.FilePath (addExtension, replaceExtension, takeExtension) @@ -52,10 +61,19 @@ import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension) import Text.Pandoc.ImageSize (numUnit, showFl) import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (many, mathDisplay, mathInline, optional, - space, (<|>)) +import Text.Pandoc.Parsing hiding (many, optional, withRaw, + mathInline, mathDisplay, + space, (<|>), spaces, blankline) import Text.Pandoc.Shared +import Text.Pandoc.Readers.LaTeX.Types (Macro(..), Tok(..), + TokType(..)) import Text.Pandoc.Walk +import Text.Pandoc.Error (PandocError(PandocParsecError, PandocMacroLoop)) + +-- for debugging: +-- import Text.Pandoc.Extensions (getDefaultExtensions) +-- import Text.Pandoc.Class (runIOorExplode, PandocIO) +-- import Debug.Trace (traceShowId) -- | Parse LaTeX from string and return 'Pandoc' document. readLaTeX :: PandocMonad m @@ -63,17 +81,18 @@ readLaTeX :: PandocMonad m -> Text -- ^ String to parse (assumes @'\n'@ line endings) -> m Pandoc readLaTeX opts ltx = do - parsed <- readWithM parseLaTeX def{ stateOptions = opts } (unpack ltx) + parsed <- runParserT parseLaTeX def{ sOptions = opts } "source" + (tokenize (crFilter ltx)) case parsed of Right result -> return result - Left e -> throwError e + Left e -> throwError $ PandocParsecError (T.unpack ltx) e parseLaTeX :: PandocMonad m => LP m Pandoc parseLaTeX = do bs <- blocks eof st <- getState - let meta = stateMeta st + let meta = sMeta st let doc' = doc bs let headerLevel (Header n _ _) = [n] headerLevel _ = [] @@ -87,177 +106,476 @@ parseLaTeX = do else id) doc' return $ Pandoc meta bs' -type LP m = ParserT String ParserState m - -anyControlSeq :: PandocMonad m => LP m String -anyControlSeq = do - char '\\' - next <- option '\n' anyChar - case next of - '\n' -> return "" - c | isLetter c -> (c:) <$> (many letter <* optional sp) - | otherwise -> return [c] - -controlSeq :: PandocMonad m => String -> LP m String -controlSeq name = try $ do - char '\\' - case name of - "" -> mzero - [c] | not (isLetter c) -> string [c] - cs -> string cs <* notFollowedBy letter <* optional sp - return name - -dimenarg :: PandocMonad m => LP m String -dimenarg = try $ do - ch <- option "" $ string "=" - num <- many1 digit - dim <- oneOfStrings ["pt","pc","in","bp","cm","mm","dd","cc","sp"] - return $ ch ++ num ++ dim +-- testParser :: LP PandocIO a -> Text -> IO a +-- testParser p t = do +-- res <- runIOorExplode (runParserT p defaultLaTeXState{ +-- sOptions = def{ readerExtensions = +-- enableExtension Ext_raw_tex $ +-- getDefaultExtensions "latex" }} "source" (tokenize t)) +-- case res of +-- Left e -> error (show e) +-- Right r -> return r + +data LaTeXState = LaTeXState{ sOptions :: ReaderOptions + , sMeta :: Meta + , sQuoteContext :: QuoteContext + , sMacros :: M.Map Text Macro + , sContainers :: [String] + , sHeaders :: M.Map Inlines String + , sLogMessages :: [LogMessage] + , sIdentifiers :: Set.Set String + , sVerbatimMode :: Bool + , sCaption :: Maybe Inlines + , sInListItem :: Bool + , sInTableCell :: Bool + } + deriving Show + +defaultLaTeXState :: LaTeXState +defaultLaTeXState = LaTeXState{ sOptions = def + , sMeta = nullMeta + , sQuoteContext = NoQuote + , sMacros = M.empty + , sContainers = [] + , sHeaders = M.empty + , sLogMessages = [] + , sIdentifiers = Set.empty + , sVerbatimMode = False + , sCaption = Nothing + , sInListItem = False + , sInTableCell = False + } + +instance PandocMonad m => HasQuoteContext LaTeXState m where + getQuoteContext = sQuoteContext <$> getState + withQuoteContext context parser = do + oldState <- getState + let oldQuoteContext = sQuoteContext oldState + setState oldState { sQuoteContext = context } + result <- parser + newState <- getState + setState newState { sQuoteContext = oldQuoteContext } + return result + +instance HasLogMessages LaTeXState where + addLogMessage msg st = st{ sLogMessages = msg : sLogMessages st } + getLogMessages st = reverse $ sLogMessages st + +instance HasIdentifierList LaTeXState where + extractIdentifierList = sIdentifiers + updateIdentifierList f st = st{ sIdentifiers = f $ sIdentifiers st } + +instance HasIncludeFiles LaTeXState where + getIncludeFiles = sContainers + addIncludeFile f s = s{ sContainers = f : sContainers s } + dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s } + +instance HasHeaderMap LaTeXState where + extractHeaderMap = sHeaders + updateHeaderMap f st = st{ sHeaders = f $ sHeaders st } + +instance HasMacros LaTeXState where + extractMacros st = sMacros st + updateMacros f st = st{ sMacros = f (sMacros st) } + +instance HasReaderOptions LaTeXState where + extractReaderOptions = sOptions + +instance HasMeta LaTeXState where + setMeta field val st = + st{ sMeta = setMeta field val $ sMeta st } + deleteMeta field st = + st{ sMeta = deleteMeta field $ sMeta st } + +instance Default LaTeXState where + def = defaultLaTeXState + +type LP m = ParserT [Tok] LaTeXState m + +withVerbatimMode :: PandocMonad m => LP m a -> LP m a +withVerbatimMode parser = do + updateState $ \st -> st{ sVerbatimMode = True } + result <- parser + updateState $ \st -> st{ sVerbatimMode = False } + return result + +rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s) + => ParserT String s m String +rawLaTeXBlock = do + lookAhead (try (char '\\' >> letter)) + inp <- getInput + let toks = tokenize $ T.pack inp + let rawblock = do + (_, raw) <- try $ + withRaw (environment <|> macroDef <|> blockCommand) + return raw + pstate <- getState + let lstate = def{ sOptions = extractReaderOptions pstate } + res <- runParserT rawblock lstate "source" toks + case res of + Left _ -> mzero + Right raw -> takeP (T.length (untokenize raw)) + +macro :: (PandocMonad m, HasMacros s, HasReaderOptions s) + => ParserT String s m Blocks +macro = do + guardEnabled Ext_latex_macros + lookAhead (char '\\' *> oneOfStrings ["new", "renew", "provide"] *> + oneOfStrings ["command", "environment"]) + inp <- getInput + let toks = tokenize $ T.pack inp + let rawblock = do + (_, raw) <- withRaw $ try macroDef + st <- getState + return (raw, st) + pstate <- getState + let lstate = def{ sOptions = extractReaderOptions pstate + , sMacros = extractMacros pstate } + res <- runParserT rawblock lstate "source" toks + case res of + Left _ -> mzero + Right (raw, st) -> do + updateState (updateMacros (const $ sMacros st)) + mempty <$ takeP (T.length (untokenize raw)) + +applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s) + => String -> ParserT String s m String +applyMacros s = do + (guardEnabled Ext_latex_macros >> + do let retokenize = doMacros 0 *> (toksToString <$> getInput) + pstate <- getState + let lstate = def{ sOptions = extractReaderOptions pstate + , sMacros = extractMacros pstate } + res <- runParserT retokenize lstate "math" (tokenize (T.pack s)) + case res of + Left e -> fail (show e) + Right s' -> return s') <|> return s + +rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s) + => ParserT String s m String +rawLaTeXInline = do + lookAhead (try (char '\\' >> letter) <|> char '$') + inp <- getInput + let toks = tokenize $ T.pack inp + let rawinline = do + (_, raw) <- try $ withRaw (inlineEnvironment <|> inlineCommand') + st <- getState + return (raw, st) + pstate <- getState + let lstate = def{ sOptions = extractReaderOptions pstate + , sMacros = extractMacros pstate } + res <- runParserT rawinline lstate "source" toks + case res of + Left _ -> mzero + Right (raw, s) -> do + updateState $ updateMacros (const $ sMacros s) + takeP (T.length (untokenize raw)) + +inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines +inlineCommand = do + lookAhead (try (char '\\' >> letter) <|> char '$') + inp <- getInput + let toks = tokenize $ T.pack inp + let rawinline = do + (il, raw) <- try $ withRaw (inlineEnvironment <|> inlineCommand') + st <- getState + return (il, raw, st) + pstate <- getState + let lstate = def{ sOptions = extractReaderOptions pstate + , sMacros = extractMacros pstate } + res <- runParserT rawinline lstate "source" toks + case res of + Left _ -> mzero + Right (il, raw, s) -> do + updateState $ updateMacros (const $ sMacros s) + takeP (T.length (untokenize raw)) + return il + +tokenize :: Text -> [Tok] +tokenize = totoks (1, 1) + +totoks :: (Line, Column) -> Text -> [Tok] +totoks (lin,col) t = + case T.uncons t of + Nothing -> [] + Just (c, rest) + | c == '\n' -> + Tok (lin, col) Newline "\n" + : totoks (lin + 1,1) rest + | isSpaceOrTab c -> + let (sps, rest') = T.span isSpaceOrTab t + in Tok (lin, col) Spaces sps + : totoks (lin, col + T.length sps) rest' + | isAlphaNum c -> + let (ws, rest') = T.span isAlphaNum t + in Tok (lin, col) Word ws + : totoks (lin, col + T.length ws) rest' + | c == '%' -> + let (cs, rest') = T.break (== '\n') rest + in Tok (lin, col) Comment ("%" <> cs) + : totoks (lin, col + 1 + T.length cs) rest' + | c == '\\' -> + case T.uncons rest of + Nothing -> [Tok (lin, col) Symbol (T.singleton c)] + Just (d, rest') + | isLetter d -> + let (ws, rest'') = T.span isLetter rest + (ss, rest''') = T.span isSpaceOrTab rest'' + in Tok (lin, col) (CtrlSeq ws) ("\\" <> ws <> ss) + : totoks (lin, + col + 1 + T.length ws + T.length ss) rest''' + | d == '\t' || d == '\n' -> + Tok (lin, col) Symbol ("\\") + : totoks (lin, col + 1) rest + | otherwise -> + Tok (lin, col) (CtrlSeq (T.singleton d)) (T.pack [c,d]) + : totoks (lin, col + 2) rest' + | c == '#' -> + let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest + in case safeRead (T.unpack t1) of + Just i -> + Tok (lin, col) (Arg i) ("#" <> t1) + : totoks (lin, col + 1 + T.length t1) t2 + Nothing -> + Tok (lin, col) Symbol ("#") + : totoks (lin, col + 1) t2 + | c == '^' -> + case T.uncons rest of + Just ('^', rest') -> + case T.uncons rest' of + Just (d, rest'') + | isLowerHex d -> + case T.uncons rest'' of + Just (e, rest''') | isLowerHex e -> + Tok (lin, col) Esc2 (T.pack ['^','^',d,e]) + : totoks (lin, col + 4) rest''' + _ -> + Tok (lin, col) Esc1 (T.pack ['^','^',d]) + : totoks (lin, col + 3) rest'' + | d < '\128' -> + Tok (lin, col) Esc1 (T.pack ['^','^',d]) + : totoks (lin, col + 3) rest'' + _ -> [Tok (lin, col) Symbol ("^"), + Tok (lin, col + 1) Symbol ("^")] + _ -> Tok (lin, col) Symbol ("^") + : totoks (lin, col + 1) rest + | otherwise -> + Tok (lin, col) Symbol (T.singleton c) : totoks (lin, col + 1) rest + + where isSpaceOrTab ' ' = True + isSpaceOrTab '\t' = True + isSpaceOrTab _ = False + +isLowerHex :: Char -> Bool +isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f' + +untokenize :: [Tok] -> Text +untokenize = mconcat . map untoken + +untoken :: Tok -> Text +untoken (Tok _ _ t) = t + +satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok +satisfyTok f = + try $ do + res <- tokenPrim (T.unpack . untoken) updatePos matcher + doMacros 0 -- apply macros on remaining input stream + return res + where matcher t | f t = Just t + | otherwise = Nothing + updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos + updatePos spos _ (Tok (lin,col) _ _ : _) = + setSourceColumn (setSourceLine spos lin) col + updatePos spos _ [] = spos + +doMacros :: PandocMonad m => Int -> LP m () +doMacros n = do + verbatimMode <- sVerbatimMode <$> getState + when (not verbatimMode) $ do + inp <- getInput + case inp of + Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" : + Tok _ Word name : Tok _ Symbol "}" : ts + -> handleMacros spos name ts + Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" : + Tok _ Word name : Tok _ Symbol "}" : ts + -> handleMacros spos ("end" <> name) ts + Tok spos (CtrlSeq name) _ : ts + -> handleMacros spos name ts + _ -> return () + where handleMacros spos name ts = do + macros <- sMacros <$> getState + case M.lookup name macros of + Nothing -> return () + Just (Macro numargs optarg newtoks) -> do + setInput ts + let getarg = spaces >> braced + args <- case optarg of + Nothing -> count numargs getarg + Just o -> + (:) <$> option o bracketedToks + <*> count (numargs - 1) getarg + let addTok (Tok _ (Arg i) _) acc | i > 0 + , i <= numargs = + map (setpos spos) (args !! (i - 1)) ++ acc + addTok t acc = setpos spos t : acc + ts' <- getInput + setInput $ foldr addTok ts' newtoks + if n > 20 -- detect macro expansion loops + then throwError $ PandocMacroLoop (T.unpack name) + else doMacros (n + 1) + +setpos :: (Line, Column) -> Tok -> Tok +setpos spos (Tok _ tt txt) = Tok spos tt txt + +anyControlSeq :: PandocMonad m => LP m Tok +anyControlSeq = satisfyTok isCtrlSeq + where isCtrlSeq (Tok _ (CtrlSeq _) _) = True + isCtrlSeq _ = False + +anySymbol :: PandocMonad m => LP m Tok +anySymbol = satisfyTok isSym + where isSym (Tok _ Symbol _) = True + isSym _ = False + +spaces :: PandocMonad m => LP m () +spaces = skipMany (satisfyTok (tokTypeIn [Comment, Spaces, Newline])) + +spaces1 :: PandocMonad m => LP m () +spaces1 = skipMany1 (satisfyTok (tokTypeIn [Comment, Spaces, Newline])) + +tokTypeIn :: [TokType] -> Tok -> Bool +tokTypeIn toktypes (Tok _ tt _) = tt `elem` toktypes + +controlSeq :: PandocMonad m => Text -> LP m Tok +controlSeq name = satisfyTok isNamed + where isNamed (Tok _ (CtrlSeq n) _) = n == name + isNamed _ = False + +symbol :: PandocMonad m => Char -> LP m Tok +symbol c = satisfyTok isc + where isc (Tok _ Symbol d) = case T.uncons d of + Just (c',_) -> c == c' + _ -> False + isc _ = False + +symbolIn :: PandocMonad m => [Char] -> LP m Tok +symbolIn cs = satisfyTok isInCs + where isInCs (Tok _ Symbol d) = case T.uncons d of + Just (c,_) -> c `elem` cs + _ -> False + isInCs _ = False sp :: PandocMonad m => LP m () sp = whitespace <|> endline whitespace :: PandocMonad m => LP m () -whitespace = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t') - -endline :: PandocMonad m => LP m () -endline = try (newline >> lookAhead anyChar >> notFollowedBy blankline) +whitespace = () <$ satisfyTok isSpaceTok + where isSpaceTok (Tok _ Spaces _) = True + isSpaceTok _ = False -isLowerHex :: Char -> Bool -isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f' +newlineTok :: PandocMonad m => LP m () +newlineTok = () <$ satisfyTok isNewlineTok -tildeEscape :: PandocMonad m => LP m Char -tildeEscape = try $ do - string "^^" - c <- satisfy (\x -> x >= '\0' && x <= '\128') - d <- if isLowerHex c - then option "" $ count 1 (satisfy isLowerHex) - else return "" - if null d - then case ord c of - x | x >= 64 && x <= 127 -> return $ chr (x - 64) - | otherwise -> return $ chr (x + 64) - else return $ chr $ read ('0':'x':c:d) +isNewlineTok :: Tok -> Bool +isNewlineTok (Tok _ Newline _) = True +isNewlineTok _ = False comment :: PandocMonad m => LP m () -comment = do - char '%' - skipMany (satisfy (/='\n')) - optional newline - return () +comment = () <$ satisfyTok isCommentTok + where isCommentTok (Tok _ Comment _) = True + isCommentTok _ = False + +anyTok :: PandocMonad m => LP m Tok +anyTok = satisfyTok (const True) -bgroup :: PandocMonad m => LP m () +endline :: PandocMonad m => LP m () +endline = try $ do + newlineTok + lookAhead anyTok + notFollowedBy blankline + +blankline :: PandocMonad m => LP m () +blankline = try $ skipMany whitespace *> newlineTok + +primEscape :: PandocMonad m => LP m Char +primEscape = do + Tok _ toktype t <- satisfyTok (tokTypeIn [Esc1, Esc2]) + case toktype of + Esc1 -> case T.uncons (T.drop 2 t) of + Just (c, _) + | c >= '\64' && c <= '\127' -> return (chr (ord c - 64)) + | otherwise -> return (chr (ord c + 64)) + Nothing -> fail "Empty content of Esc1" + Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of + Just x -> return (chr x) + Nothing -> fail $ "Could not read: " ++ T.unpack t + _ -> fail "Expected an Esc1 or Esc2 token" -- should not happen + +bgroup :: PandocMonad m => LP m Tok bgroup = try $ do - skipMany (spaceChar <|> try (newline <* notFollowedBy blankline)) - () <$ char '{' - <|> () <$ controlSeq "bgroup" - <|> () <$ controlSeq "begingroup" + skipMany sp + symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup" -egroup :: PandocMonad m => LP m () -egroup = () <$ char '}' - <|> () <$ controlSeq "egroup" - <|> () <$ controlSeq "endgroup" +egroup :: PandocMonad m => LP m Tok +egroup = (symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup") -grouped :: PandocMonad m => Monoid a => LP m a -> LP m a +grouped :: (PandocMonad m, Monoid a) => LP m a -> LP m a grouped parser = try $ do bgroup -- first we check for an inner 'grouped', because -- {{a,b}} should be parsed the same as {a,b} - try (grouped parser <* egroup) - <|> (mconcat <$> manyTill parser egroup) - -braced :: PandocMonad m => LP m String -braced = grouped chunk - where chunk = - many1 (satisfy (\c -> c /= '\\' && c /= '}' && c /= '{')) - <|> try (string "\\}") - <|> try (string "\\{") - <|> try (string "\\\\") - <|> ((\x -> "{" ++ x ++ "}") <$> braced) - <|> count 1 anyChar + try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup) + +braced :: PandocMonad m => LP m [Tok] +braced = bgroup *> braced' 1 + where braced' (n :: Int) = + handleEgroup n <|> handleBgroup n <|> handleOther n + handleEgroup n = do + t <- egroup + if n == 1 + then return [] + else (t:) <$> braced' (n - 1) + handleBgroup n = do + t <- bgroup + (t:) <$> braced' (n + 1) + handleOther n = do + t <- anyTok + (t:) <$> braced' n bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a -bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']')) - -mathDisplay :: PandocMonad m => LP m String -> LP m Inlines -mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim) - -mathInline :: PandocMonad m => LP m String -> LP m Inlines -mathInline p = math <$> (try p >>= applyMacros') - -mathChars :: PandocMonad m => LP m String -mathChars = - concat <$> many (escapedChar - <|> (snd <$> withRaw braced) - <|> many1 (satisfy isOrdChar)) - where escapedChar = try $ do char '\\' - c <- anyChar - return ['\\',c] - isOrdChar '$' = False - isOrdChar '{' = False - isOrdChar '}' = False - isOrdChar '\\' = False - isOrdChar _ = True - -quoted' :: PandocMonad m => (Inlines -> Inlines) -> LP m String -> LP m () -> LP m Inlines -quoted' f starter ender = do - startchs <- starter - smart <- extensionEnabled Ext_smart <$> getOption readerExtensions - if smart - then do - ils <- many (notFollowedBy ender >> inline) - (ender >> return (f (mconcat ils))) <|> - (<> mconcat ils) <$> - lit (case startchs of - "``" -> "“" - "`" -> "‘" - _ -> startchs) - else lit startchs +bracketed parser = try $ do + symbol '[' + mconcat <$> manyTill parser (symbol ']') -doubleQuote :: PandocMonad m => LP m Inlines -doubleQuote = do - quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") - <|> quoted' doubleQuoted (string "“") (void $ char '”') - -- the following is used by babel for localized quotes: - <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'") - <|> quoted' doubleQuoted (string "\"") (void $ char '"') +dimenarg :: PandocMonad m => LP m Text +dimenarg = try $ do + ch <- option False $ True <$ symbol '=' + Tok _ _ s <- satisfyTok isWordTok + guard $ (T.take 2 (T.reverse s)) `elem` + ["pt","pc","in","bp","cm","mm","dd","cc","sp"] + let num = T.take (T.length s - 2) s + guard $ T.length num > 0 + guard $ T.all isDigit num + return $ T.pack ['=' | ch] <> s -singleQuote :: PandocMonad m => LP m Inlines -singleQuote = do - smart <- extensionEnabled Ext_smart <$> getOption readerExtensions - if smart - then quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) - <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter) - else str <$> many1 (oneOf "`\'‘’") +-- inline elements: -inline :: PandocMonad m => LP m Inlines -inline = (mempty <$ comment) - <|> (space <$ whitespace) - <|> (softbreak <$ endline) - <|> inlineText - <|> inlineCommand - <|> inlineEnvironment - <|> inlineGroup - <|> (char '-' *> option (str "-") - (char '-' *> option (str "–") (str "—" <$ char '-'))) - <|> doubleQuote - <|> singleQuote - <|> (str "”" <$ try (string "''")) - <|> (str "”" <$ char '”') - <|> (str "’" <$ char '\'') - <|> (str "’" <$ char '’') - <|> (str "\160" <$ char '~') - <|> mathDisplay (string "$$" *> mathChars <* string "$$") - <|> mathInline (char '$' *> mathChars <* char '$') - <|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb) - <|> (str . (:[]) <$> tildeEscape) - <|> (do res <- oneOf "#&~^'`\"[]" - pos <- getPosition - report $ ParsingUnescaped [res] pos - return $ str [res]) +word :: PandocMonad m => LP m Inlines +word = (str . T.unpack . untoken) <$> satisfyTok isWordTok -inlines :: PandocMonad m => LP m Inlines -inlines = mconcat <$> many (notFollowedBy (char '}') *> inline) +regularSymbol :: PandocMonad m => LP m Inlines +regularSymbol = (str . T.unpack . untoken) <$> satisfyTok isRegularSymbol + where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t + isRegularSymbol _ = False + isSpecial c = c `Set.member` specialChars + +specialChars :: Set.Set Char +specialChars = Set.fromList "#$%&~_^\\{}" + +isWordTok :: Tok -> Bool +isWordTok (Tok _ Word _) = True +isWordTok _ = False inlineGroup :: PandocMonad m => LP m Inlines inlineGroup = do @@ -268,467 +586,19 @@ inlineGroup = do -- we need the span so we can detitlecase bibtex entries; -- we need to know when something is {C}apitalized -block :: PandocMonad m => LP m Blocks -block = (mempty <$ comment) - <|> (mempty <$ ((spaceChar <|> newline) *> spaces)) - <|> environment - <|> include - <|> macro - <|> blockCommand - <|> paragraph - <|> grouped block - -blocks :: PandocMonad m => LP m Blocks -blocks = mconcat <$> many block - -getRawCommand :: PandocMonad m => String -> LP m String -getRawCommand name' = do - rawargs <- withRaw (many (try (optional sp *> opt)) *> - option "" (try (optional sp *> dimenarg)) *> - many braced) - return $ '\\' : name' ++ snd rawargs - -lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v -lookupListDefault d = (fromMaybe d .) . lookupList - where - lookupList l m = msum $ map (`M.lookup` m) l - -blockCommand :: PandocMonad m => LP m Blocks -blockCommand = try $ do - name <- anyControlSeq - guard $ name /= "begin" && name /= "end" - star <- option "" (string "*" <* optional sp) - let name' = name ++ star - let raw = do - rawcommand <- getRawCommand name' - transformed <- applyMacros' rawcommand - guard $ transformed /= rawcommand - notFollowedBy $ parseFromString' inlines transformed - parseFromString' blocks transformed - lookupListDefault raw [name',name] blockCommands - -inBrackets :: Inlines -> Inlines -inBrackets x = str "[" <> x <> str "]" - --- eat an optional argument and one or more arguments in braces -ignoreInlines :: PandocMonad m => String -> (String, LP m Inlines) -ignoreInlines name = (name, p) - where - p = do oa <- optargs - let rawCommand = '\\':name ++ oa - let doraw = guardRaw >> return (rawInline "latex" rawCommand) - doraw <|> ignore rawCommand - -guardRaw :: PandocMonad m => LP m () -guardRaw = getOption readerExtensions >>= guard . extensionEnabled Ext_raw_tex - -optargs :: PandocMonad m => LP m String -optargs = snd <$> withRaw (skipopts *> skipMany (try $ optional sp *> braced)) - -ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a -ignore raw = do - pos <- getPosition - report $ SkippedContent raw pos - return mempty - -ignoreBlocks :: PandocMonad m => String -> (String, LP m Blocks) -ignoreBlocks name = (name, p) - where - p = do oa <- optargs - let rawCommand = '\\':name ++ oa - let doraw = guardRaw >> return (rawBlock "latex" rawCommand) - doraw <|> ignore rawCommand - -blockCommands :: PandocMonad m => M.Map String (LP m Blocks) -blockCommands = M.fromList $ - [ ("par", mempty <$ skipopts) - , ("parbox", braced >> grouped blocks) - , ("title", mempty <$ (skipopts *> - (grouped inline >>= addMeta "title") - <|> (grouped block >>= addMeta "title"))) - , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle")) - , ("author", mempty <$ (skipopts *> authors)) - -- -- in letter class, temp. store address & sig as title, author - , ("address", mempty <$ (skipopts *> tok >>= addMeta "address")) - , ("signature", mempty <$ (skipopts *> authors)) - , ("date", mempty <$ (skipopts *> tok >>= addMeta "date")) - -- Koma-script metadata commands - , ("dedication", mempty <$ (skipopts *> tok >>= addMeta "dedication")) - -- sectioning - , ("part", section nullAttr (-1)) - , ("part*", section nullAttr (-1)) - , ("chapter", section nullAttr 0) - , ("chapter*", section ("",["unnumbered"],[]) 0) - , ("section", section nullAttr 1) - , ("section*", section ("",["unnumbered"],[]) 1) - , ("subsection", section nullAttr 2) - , ("subsection*", section ("",["unnumbered"],[]) 2) - , ("subsubsection", section nullAttr 3) - , ("subsubsection*", section ("",["unnumbered"],[]) 3) - , ("paragraph", section nullAttr 4) - , ("paragraph*", section ("",["unnumbered"],[]) 4) - , ("subparagraph", section nullAttr 5) - , ("subparagraph*", section ("",["unnumbered"],[]) 5) - -- beamer slides - , ("frametitle", section nullAttr 3) - , ("framesubtitle", section nullAttr 4) - -- letters - , ("opening", (para . trimInlines) <$> (skipopts *> tok)) - , ("closing", skipopts *> closing) - -- - , ("hrule", pure horizontalRule) - , ("strut", pure mempty) - , ("rule", skipopts *> tok *> tok *> pure horizontalRule) - , ("item", skipopts *> looseItem) - , ("documentclass", skipopts *> braced *> preamble) - , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) - , ("caption", skipopts *> setCaption) - , ("bibliography", mempty <$ (skipopts *> braced >>= - addMeta "bibliography" . splitBibs)) - , ("addbibresource", mempty <$ (skipopts *> braced >>= - addMeta "bibliography" . splitBibs)) - -- includes - , ("lstinputlisting", inputListing) - , ("graphicspath", graphicsPath) - -- hyperlink - , ("hypertarget", braced >> grouped block) - -- LaTeX colors - , ("textcolor", coloredBlock "color") - , ("colorbox", coloredBlock "background-color") - ] ++ map ignoreBlocks - -- these commands will be ignored unless --parse-raw is specified, - -- in which case they will appear as raw latex blocks - [ "newcommand", "renewcommand", "newenvironment", "renewenvironment" - -- newcommand, etc. should be parsed by macro, but we need this - -- here so these aren't parsed as inline commands to ignore - , "special", "pdfannot", "pdfstringdef" - , "bibliographystyle" - , "maketitle", "makeindex", "makeglossary" - , "addcontentsline", "addtocontents", "addtocounter" - -- \ignore{} is used conventionally in literate haskell for definitions - -- that are to be processed by the compiler but not printed. - , "ignore" - , "hyperdef" - , "markboth", "markright", "markleft" - , "hspace", "vspace" - , "newpage" - , "clearpage" - , "pagebreak" - ] - -coloredBlock :: PandocMonad m => String -> LP m Blocks -coloredBlock stylename = do - skipopts - color <- braced - let constructor = divWith ("",[],[("style",stylename ++ ": " ++ color)]) - inlineContents <|> constructor <$> blockContents - where inlineContents = do - ils <- grouped inline - rest <- inlines - return (para (ils <> rest)) - blockContents = grouped block - -graphicsPath :: PandocMonad m => LP m Blocks -graphicsPath = do - ps <- bgroup *> (manyTill braced egroup) - getResourcePath >>= setResourcePath . (++ ps) - return mempty - -addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m () -addMeta field val = updateState $ \st -> - st{ stateMeta = addMetaField field val $ stateMeta st } - -splitBibs :: String -> [Inlines] -splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',') - -setCaption :: PandocMonad m => LP m Blocks -setCaption = do - ils <- tok - mblabel <- option Nothing $ - try $ spaces' >> controlSeq "label" >> (Just <$> tok) - let ils' = case mblabel of - Just lab -> ils <> spanWith - ("",[],[("data-label", stringify lab)]) mempty - Nothing -> ils - updateState $ \st -> st{ stateCaption = Just ils' } - return mempty - -resetCaption :: PandocMonad m => LP m () -resetCaption = updateState $ \st -> st{ stateCaption = Nothing } - -authors :: PandocMonad m => LP m () -authors = try $ do - bgroup - let oneAuthor = mconcat <$> - many1 (notFollowedBy' (controlSeq "and") >> - (inline <|> mempty <$ blockCommand)) - -- skip e.g. \vspace{10pt} - auths <- sepBy oneAuthor (controlSeq "and") - egroup - addMeta "author" (map trimInlines auths) - -section :: PandocMonad m => Attr -> Int -> LP m Blocks -section (ident, classes, kvs) lvl = do - skipopts - contents <- grouped inline - lab <- option ident $ try (spaces' >> controlSeq "label" >> spaces' >> braced) - attr' <- registerHeader (lab, classes, kvs) contents - return $ headerWith attr' lvl contents - -inlineCommand :: PandocMonad m => LP m Inlines -inlineCommand = try $ do - (name, raw') <- withRaw anyControlSeq - guard $ name /= "begin" && name /= "end" - star <- option "" (string "*") - let name' = name ++ star - let raw = do - guard $ not (isBlockCommand name) - rawargs <- withRaw - (skipangles *> skipopts *> option "" dimenarg *> many braced) - let rawcommand = raw' ++ star ++ snd rawargs - transformed <- applyMacros' rawcommand - exts <- getOption readerExtensions - if transformed /= rawcommand - then parseFromString' inlines transformed - else if extensionEnabled Ext_raw_tex exts - then return $ rawInline "latex" rawcommand - else ignore rawcommand - (lookupListDefault raw [name',name] inlineCommands <* - optional (try (string "{}"))) - -rawInlineOr :: PandocMonad m => String -> LP m Inlines -> LP m Inlines -rawInlineOr name' fallback = do - parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions - if parseRaw - then rawInline "latex" <$> getRawCommand name' - else fallback - -isBlockCommand :: String -> Bool -isBlockCommand s = s `M.member` (blockCommands :: M.Map String (LP PandocPure Blocks)) - - -inlineEnvironments :: PandocMonad m => M.Map String (LP m Inlines) -inlineEnvironments = M.fromList - [ ("displaymath", mathEnvWith id Nothing "displaymath") - , ("math", math <$> mathEnv "math") - , ("equation", mathEnvWith id Nothing "equation") - , ("equation*", mathEnvWith id Nothing "equation*") - , ("gather", mathEnvWith id (Just "gathered") "gather") - , ("gather*", mathEnvWith id (Just "gathered") "gather*") - , ("multline", mathEnvWith id (Just "gathered") "multline") - , ("multline*", mathEnvWith id (Just "gathered") "multline*") - , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray") - , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*") - , ("align", mathEnvWith id (Just "aligned") "align") - , ("align*", mathEnvWith id (Just "aligned") "align*") - , ("alignat", mathEnvWith id (Just "aligned") "alignat") - , ("alignat*", mathEnvWith id (Just "aligned") "alignat*") - ] - -inlineCommands :: PandocMonad m => M.Map String (LP m Inlines) -inlineCommands = M.fromList $ - [ ("emph", extractSpaces emph <$> tok) - , ("textit", extractSpaces emph <$> tok) - , ("textsl", extractSpaces emph <$> tok) - , ("textsc", extractSpaces smallcaps <$> tok) - , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok) - , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok) - , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok) - , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok) - , ("texttt", ttfamily) - , ("sout", extractSpaces strikeout <$> tok) - , ("textsuperscript", extractSpaces superscript <$> tok) - , ("textsubscript", extractSpaces subscript <$> tok) - , ("textbackslash", lit "\\") - , ("backslash", lit "\\") - , ("slash", lit "/") - , ("textbf", extractSpaces strong <$> tok) - , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok) - , ("ldots", lit "…") - , ("vdots", lit "\8942") - , ("dots", lit "…") - , ("mdots", lit "…") - , ("sim", lit "~") - , ("label", rawInlineOr "label" (inBrackets <$> tok)) - , ("ref", rawInlineOr "ref" (inBrackets <$> tok)) - , ("textgreek", tok) - , ("sep", lit ",") - , ("cref", rawInlineOr "cref" (inBrackets <$> tok)) -- from cleveref.sty - , ("(", mathInline $ manyTill anyChar (try $ string "\\)")) - , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]")) - , ("ensuremath", mathInline braced) - , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok) - , ("P", lit "¶") - , ("S", lit "§") - , ("$", lit "$") - , ("%", lit "%") - , ("&", lit "&") - , ("#", lit "#") - , ("_", lit "_") - , ("{", lit "{") - , ("}", lit "}") - -- old TeX commands - , ("em", extractSpaces emph <$> inlines) - , ("it", extractSpaces emph <$> inlines) - , ("sl", extractSpaces emph <$> inlines) - , ("bf", extractSpaces strong <$> inlines) - , ("rm", inlines) - , ("itshape", extractSpaces emph <$> inlines) - , ("slshape", extractSpaces emph <$> inlines) - , ("scshape", extractSpaces smallcaps <$> inlines) - , ("bfseries", extractSpaces strong <$> inlines) - , ("/", pure mempty) -- italic correction - , ("aa", lit "å") - , ("AA", lit "Å") - , ("ss", lit "ß") - , ("o", lit "ø") - , ("O", lit "Ø") - , ("L", lit "Ł") - , ("l", lit "ł") - , ("ae", lit "æ") - , ("AE", lit "Æ") - , ("oe", lit "œ") - , ("OE", lit "Œ") - , ("pounds", lit "£") - , ("euro", lit "€") - , ("copyright", lit "©") - , ("textasciicircum", lit "^") - , ("textasciitilde", lit "~") - , ("H", try $ tok >>= accent hungarumlaut) - , ("`", option (str "`") $ try $ tok >>= accent grave) - , ("'", option (str "'") $ try $ tok >>= accent acute) - , ("^", option (str "^") $ try $ tok >>= accent circ) - , ("~", option (str "~") $ try $ tok >>= accent tilde) - , ("\"", option (str "\"") $ try $ tok >>= accent umlaut) - , (".", option (str ".") $ try $ tok >>= accent dot) - , ("=", option (str "=") $ try $ tok >>= accent macron) - , ("c", option (str "c") $ try $ tok >>= accent cedilla) - , ("v", option (str "v") $ try $ tok >>= accent hacek) - , ("u", option (str "u") $ try $ tok >>= accent breve) - , ("i", lit "i") - , ("\\", linebreak <$ (optional (bracketed inline) *> spaces')) - , (",", lit "\8198") - , ("@", pure mempty) - , (" ", lit "\160") - , ("ps", pure $ str "PS." <> space) - , ("TeX", lit "TeX") - , ("LaTeX", lit "LaTeX") - , ("bar", lit "|") - , ("textless", lit "<") - , ("textgreater", lit ">") - , ("thanks", note <$> grouped block) - , ("footnote", note <$> grouped block) - , ("verb", doverb) - , ("lstinline", dolstinline) - , ("Verb", doverb) - , ("url", (unescapeURL <$> braced) >>= \url -> - pure (link url "" (str url))) - , ("href", (unescapeURL <$> braced <* optional sp) >>= \url -> - tok >>= \lab -> - pure (link url "" lab)) - , ("includegraphics", do options <- option [] keyvals - src <- unescapeURL . removeDoubleQuotes <$> braced - mkImage options src) - , ("enquote", enquote) - , ("cite", citation "cite" NormalCitation False) - , ("Cite", citation "Cite" NormalCitation False) - , ("citep", citation "citep" NormalCitation False) - , ("citep*", citation "citep*" NormalCitation False) - , ("citeal", citation "citeal" NormalCitation False) - , ("citealp", citation "citealp" NormalCitation False) - , ("citealp*", citation "citealp*" NormalCitation False) - , ("autocite", citation "autocite" NormalCitation False) - , ("smartcite", citation "smartcite" NormalCitation False) - , ("footcite", inNote <$> citation "footcite" NormalCitation False) - , ("parencite", citation "parencite" NormalCitation False) - , ("supercite", citation "supercite" NormalCitation False) - , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False) - , ("citeyearpar", citation "citeyearpar" SuppressAuthor False) - , ("citeyear", citation "citeyear" SuppressAuthor False) - , ("autocite*", citation "autocite*" SuppressAuthor False) - , ("cite*", citation "cite*" SuppressAuthor False) - , ("parencite*", citation "parencite*" SuppressAuthor False) - , ("textcite", citation "textcite" AuthorInText False) - , ("citet", citation "citet" AuthorInText False) - , ("citet*", citation "citet*" AuthorInText False) - , ("citealt", citation "citealt" AuthorInText False) - , ("citealt*", citation "citealt*" AuthorInText False) - , ("textcites", citation "textcites" AuthorInText True) - , ("cites", citation "cites" NormalCitation True) - , ("autocites", citation "autocites" NormalCitation True) - , ("footcites", inNote <$> citation "footcites" NormalCitation True) - , ("parencites", citation "parencites" NormalCitation True) - , ("supercites", citation "supercites" NormalCitation True) - , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True) - , ("Autocite", citation "Autocite" NormalCitation False) - , ("Smartcite", citation "Smartcite" NormalCitation False) - , ("Footcite", citation "Footcite" NormalCitation False) - , ("Parencite", citation "Parencite" NormalCitation False) - , ("Supercite", citation "Supercite" NormalCitation False) - , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False) - , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False) - , ("Citeyear", citation "Citeyear" SuppressAuthor False) - , ("Autocite*", citation "Autocite*" SuppressAuthor False) - , ("Cite*", citation "Cite*" SuppressAuthor False) - , ("Parencite*", citation "Parencite*" SuppressAuthor False) - , ("Textcite", citation "Textcite" AuthorInText False) - , ("Textcites", citation "Textcites" AuthorInText True) - , ("Cites", citation "Cites" NormalCitation True) - , ("Autocites", citation "Autocites" NormalCitation True) - , ("Footcites", citation "Footcites" NormalCitation True) - , ("Parencites", citation "Parencites" NormalCitation True) - , ("Supercites", citation "Supercites" NormalCitation True) - , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True) - , ("citetext", complexNatbibCitation NormalCitation) - , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *> - complexNatbibCitation AuthorInText) - <|> citation "citeauthor" AuthorInText False) - , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>= - addMeta "nocite")) - , ("hypertarget", braced >> tok) - -- siuntix - , ("SI", dosiunitx) - -- hyphenat - , ("bshyp", lit "\\\173") - , ("fshyp", lit "/\173") - , ("dothyp", lit ".\173") - , ("colonhyp", lit ":\173") - , ("hyp", lit "-") - , ("nohyphens", tok) - , ("textnhtt", ttfamily) - , ("nhttfamily", ttfamily) - -- LaTeX colors - , ("textcolor", coloredInline "color") - , ("colorbox", coloredInline "background-color") - -- fontawesome - , ("faCheck", lit "\10003") - , ("faClose", lit "\10007") - ] ++ map ignoreInlines - -- these commands will be ignored unless --parse-raw is specified, - -- in which case they will appear as raw latex blocks: - [ "index" - , "hspace" - , "vspace" - , "newpage" - , "clearpage" - , "pagebreak" - ] - -coloredInline :: PandocMonad m => String -> LP m Inlines -coloredInline stylename = do - skipopts - color <- braced - spanWith ("",[],[("style",stylename ++ ": " ++ color)]) <$> tok - -ttfamily :: PandocMonad m => LP m Inlines -ttfamily = (code . stringify . toList) <$> tok +doLHSverb :: PandocMonad m => LP m Inlines +doLHSverb = + (codeWith ("",["haskell"],[]) . T.unpack . untokenize) + <$> manyTill (satisfyTok (not . isNewlineTok)) (symbol '|') mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines mkImage options src = do - let replaceTextwidth (k,v) = case numUnit v of - Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%") - _ -> (k, v) - let kvs = map replaceTextwidth $ filter (\(k,_) -> k `elem` ["width", "height"]) options + let replaceTextwidth (k,v) = + case numUnit v of + Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%") + _ -> (k, v) + let kvs = map replaceTextwidth + $ filter (\(k,_) -> k `elem` ["width", "height"]) options let attr = ("",[], kvs) let alt = str "image" case takeExtension src of @@ -737,56 +607,131 @@ mkImage options src = do return $ imageWith attr (addExtension src defaultExt) "" alt _ -> return $ imageWith attr src "" alt -inNote :: Inlines -> Inlines -inNote ils = - note $ para $ ils <> str "." +-- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €" +dosiunitx :: PandocMonad m => LP m Inlines +dosiunitx = do + skipopts + value <- tok + valueprefix <- option "" $ bracketed tok + unit <- tok + let emptyOr160 "" = "" + emptyOr160 _ = "\160" + return . mconcat $ [valueprefix, + emptyOr160 valueprefix, + value, + emptyOr160 unit, + unit] -unescapeURL :: String -> String -unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs - where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String) -unescapeURL (x:xs) = x:unescapeURL xs -unescapeURL [] = "" +lit :: String -> LP m Inlines +lit = pure . str + +removeDoubleQuotes :: Text -> Text +removeDoubleQuotes t = + maybe t id $ T.stripPrefix "\"" t >>= T.stripSuffix "\"" + +doubleQuote :: PandocMonad m => LP m Inlines +doubleQuote = do + quoted' doubleQuoted (try $ count 2 $ symbol '`') + (void $ try $ count 2 $ symbol '\'') + <|> quoted' doubleQuoted ((:[]) <$> symbol '“') (void $ symbol '”') + -- the following is used by babel for localized quotes: + <|> quoted' doubleQuoted (try $ sequence [symbol '"', symbol '`']) + (void $ try $ sequence [symbol '"', symbol '\'']) + <|> quoted' doubleQuoted ((:[]) <$> symbol '"') + (void $ symbol '"') + +singleQuote :: PandocMonad m => LP m Inlines +singleQuote = do + quoted' singleQuoted ((:[]) <$> symbol '`') + (try $ symbol '\'' >> + notFollowedBy (satisfyTok startsWithLetter)) + <|> quoted' singleQuoted ((:[]) <$> symbol '‘') + (try $ symbol '’' >> + notFollowedBy (satisfyTok startsWithLetter)) + where startsWithLetter (Tok _ Word t) = + case T.uncons t of + Just (c, _) | isLetter c -> True + _ -> False + startsWithLetter _ = False + +quoted' :: PandocMonad m + => (Inlines -> Inlines) + -> LP m [Tok] + -> LP m () + -> LP m Inlines +quoted' f starter ender = do + startchs <- (T.unpack . untokenize) <$> starter + smart <- extensionEnabled Ext_smart <$> getOption readerExtensions + if smart + then do + ils <- many (notFollowedBy ender >> inline) + (ender >> return (f (mconcat ils))) <|> + (<> mconcat ils) <$> + lit (case startchs of + "``" -> "“" + "`" -> "‘" + cs -> cs) + else lit startchs enquote :: PandocMonad m => LP m Inlines enquote = do skipopts - context <- stateQuoteContext <$> getState - if context == InDoubleQuote + quoteContext <- sQuoteContext <$> getState + if quoteContext == InDoubleQuote then singleQuoted <$> withQuoteContext InSingleQuote tok else doubleQuoted <$> withQuoteContext InDoubleQuote tok doverb :: PandocMonad m => LP m Inlines doverb = do - marker <- anyChar - code <$> manyTill (satisfy (/='\n')) (char marker) + Tok _ Symbol t <- anySymbol + marker <- case T.uncons t of + Just (c, ts) | T.null ts -> return c + _ -> mzero + withVerbatimMode $ + (code . T.unpack . untokenize) <$> + manyTill (verbTok marker) (symbol marker) + +verbTok :: PandocMonad m => Char -> LP m Tok +verbTok stopchar = do + t@(Tok (lin, col) toktype txt) <- satisfyTok (not . isNewlineTok) + case T.findIndex (== stopchar) txt of + Nothing -> return t + Just i -> do + let (t1, t2) = T.splitAt i txt + inp <- getInput + setInput $ Tok (lin, col + i) Symbol (T.singleton stopchar) + : (totoks (lin, col + i + 1) (T.drop 1 t2)) ++ inp + return $ Tok (lin, col) toktype t1 dolstinline :: PandocMonad m => LP m Inlines dolstinline = do options <- option [] keyvals let classes = maybeToList $ lookup "language" options >>= fromListingsLanguage - marker <- char '{' <|> anyChar - codeWith ("",classes,[]) <$> manyTill (satisfy (/='\n')) (char '}' <|> char marker) + Tok _ Symbol t <- anySymbol + marker <- case T.uncons t of + Just (c, ts) | T.null ts -> return c + _ -> mzero + let stopchar = if marker == '{' then '}' else marker + withVerbatimMode $ + (codeWith ("",classes,[]) . T.unpack . untokenize) <$> + manyTill (verbTok stopchar) (symbol stopchar) -doLHSverb :: PandocMonad m => LP m Inlines -doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char '|') - --- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €" -dosiunitx :: PandocMonad m => LP m Inlines -dosiunitx = do - skipopts - value <- tok - valueprefix <- option "" $ char '[' >> (mconcat <$> manyTill tok (char ']')) - unit <- tok - let emptyOr160 "" = "" - emptyOr160 _ = "\160" - return . mconcat $ [valueprefix, - emptyOr160 valueprefix, - value, - emptyOr160 unit, - unit] +keyval :: PandocMonad m => LP m (String, String) +keyval = try $ do + Tok _ Word key <- satisfyTok isWordTok + let isSpecSym (Tok _ Symbol t) = t `elem` [".",":","-","|","\\"] + isSpecSym _ = False + val <- option [] $ do + symbol '=' + braced <|> (many1 (satisfyTok isWordTok <|> satisfyTok isSpecSym + <|> anyControlSeq)) + optional sp + optional (symbol ',') + optional sp + return (T.unpack key, T.unpack . untokenize $ val) -lit :: String -> LP m Inlines -lit = pure . str +keyvals :: PandocMonad m => LP m [(String, String)] +keyvals = try $ symbol '[' >> manyTill keyval (symbol ']') accent :: (Char -> String) -> Inlines -> LP m Inlines accent f ils = @@ -994,18 +939,149 @@ breve 'U' = "Ŭ" breve 'u' = "ŭ" breve c = [c] +toksToString :: [Tok] -> String +toksToString = T.unpack . untokenize + +mathDisplay :: String -> Inlines +mathDisplay = displayMath . trim + +mathInline :: String -> Inlines +mathInline = math . trim + +dollarsMath :: PandocMonad m => LP m Inlines +dollarsMath = do + symbol '$' + display <- option False (True <$ symbol '$') + contents <- trim . toksToString <$> + many (notFollowedBy (symbol '$') >> anyTok) + if display + then do + mathDisplay contents <$ try (symbol '$' >> symbol '$') + <|> (guard (null contents) >> return (mathInline "")) + else mathInline contents <$ (symbol '$') + +-- citations + +addPrefix :: [Inline] -> [Citation] -> [Citation] +addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks +addPrefix _ _ = [] + +addSuffix :: [Inline] -> [Citation] -> [Citation] +addSuffix s ks@(_:_) = + let k = last ks + in init ks ++ [k {citationSuffix = citationSuffix k ++ s}] +addSuffix _ _ = [] + +simpleCiteArgs :: PandocMonad m => LP m [Citation] +simpleCiteArgs = try $ do + first <- optionMaybe $ toList <$> opt + second <- optionMaybe $ toList <$> opt + keys <- try $ bgroup *> (manyTill citationLabel egroup) + let (pre, suf) = case (first , second ) of + (Just s , Nothing) -> (mempty, s ) + (Just s , Just t ) -> (s , t ) + _ -> (mempty, mempty) + conv k = Citation { citationId = k + , citationPrefix = [] + , citationSuffix = [] + , citationMode = NormalCitation + , citationHash = 0 + , citationNoteNum = 0 + } + return $ addPrefix pre $ addSuffix suf $ map conv keys + +citationLabel :: PandocMonad m => LP m String +citationLabel = do + optional sp + toksToString <$> + (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar) + <* optional sp + <* optional (symbol ',') + <* optional sp) + where bibtexKeyChar = ".:;?!`'()/*@_+=-[]" :: [Char] + +cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation] +cites mode multi = try $ do + cits <- if multi + then many1 simpleCiteArgs + else count 1 simpleCiteArgs + let cs = concat cits + return $ case mode of + AuthorInText -> case cs of + (c:rest) -> c {citationMode = mode} : rest + [] -> [] + _ -> map (\a -> a {citationMode = mode}) cs + +citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines +citation name mode multi = do + (c,raw) <- withRaw $ cites mode multi + return $ cite c (rawInline "latex" $ "\\" ++ name ++ (toksToString raw)) + +handleCitationPart :: Inlines -> [Citation] +handleCitationPart ils = + let isCite Cite{} = True + isCite _ = False + (pref, rest) = break isCite (toList ils) + in case rest of + (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs + _ -> [] + +complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines +complexNatbibCitation mode = try $ do + (cs, raw) <- + withRaw $ concat <$> do + bgroup + items <- mconcat <$> + many1 (notFollowedBy (symbol ';') >> inline) + `sepBy1` (symbol ';') + egroup + return $ map handleCitationPart items + case cs of + [] -> mzero + (c:cits) -> return $ cite (c{ citationMode = mode }:cits) + (rawInline "latex" $ "\\citetext" ++ toksToString raw) + +inNote :: Inlines -> Inlines +inNote ils = + note $ para $ ils <> str "." + +inlineCommand' :: PandocMonad m => LP m Inlines +inlineCommand' = try $ do + Tok _ (CtrlSeq name) cmd <- anyControlSeq + guard $ name /= "begin" && name /= "end" + star <- option "" ("*" <$ symbol '*' <* optional sp) + let name' = name <> star + let names = ordNub [name', name] -- check non-starred as fallback + let raw = do + guard $ isInlineCommand name || not (isBlockCommand name) + rawcommand <- getRawCommand (cmd <> star) + (guardEnabled Ext_raw_tex >> return (rawInline "latex" rawcommand)) + <|> ignore rawcommand + lookupListDefault raw names inlineCommands + tok :: PandocMonad m => LP m Inlines -tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar +tok = grouped inline <|> inlineCommand' <|> singleChar + where singleChar = try $ do + Tok (lin,col) toktype t <- satisfyTok (tokTypeIn [Word, Symbol]) + guard $ not $ toktype == Symbol && + T.any (`Set.member` specialChars) t + if T.length t > 1 + then do + let (t1, t2) = (T.take 1 t, T.drop 1 t) + inp <- getInput + setInput $ (Tok (lin, col + 1) toktype t2) : inp + return $ str (T.unpack t1) + else return $ str (T.unpack t) opt :: PandocMonad m => LP m Inlines opt = bracketed inline -rawopt :: PandocMonad m => LP m String +rawopt :: PandocMonad m => LP m Text rawopt = do - contents <- bracketed (many1 (noneOf "[]") <|> try (string "\\]") <|> - try (string "\\[") <|> rawopt) + symbol '[' + inner <- untokenize <$> manyTill anyTok (symbol ']') optional sp - return $ "[" ++ contents ++ "]" + return $ "[" <> inner <> "]" skipopts :: PandocMonad m => LP m () skipopts = skipMany rawopt @@ -1013,58 +1089,719 @@ skipopts = skipMany rawopt -- opts in angle brackets are used in beamer rawangle :: PandocMonad m => LP m () rawangle = try $ do - char '<' - skipMany (noneOf ">") - char '>' - return () + symbol '<' + () <$ manyTill anyTok (symbol '>') skipangles :: PandocMonad m => LP m () skipangles = skipMany rawangle -inlineText :: PandocMonad m => LP m Inlines -inlineText = str <$> many1 inlineChar +ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a +ignore raw = do + pos <- getPosition + report $ SkippedContent raw pos + return mempty + +withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok]) +withRaw parser = do + inp <- getInput + result <- parser + nxt <- option (Tok (0,0) Word "") (lookAhead anyTok) + let raw = takeWhile (/= nxt) inp + return (result, raw) + +inBrackets :: Inlines -> Inlines +inBrackets x = str "[" <> x <> str "]" + +unescapeURL :: String -> String +unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs + where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String) +unescapeURL (x:xs) = x:unescapeURL xs +unescapeURL [] = "" + +mathEnvWith :: PandocMonad m + => (Inlines -> a) -> Maybe Text -> Text -> LP m a +mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name + where inner x = case innerEnv of + Nothing -> x + Just y -> "\\begin{" ++ T.unpack y ++ "}\n" ++ x ++ + "\\end{" ++ T.unpack y ++ "}" + +mathEnv :: PandocMonad m => Text -> LP m String +mathEnv name = do + skipopts + optional blankline + res <- manyTill anyTok (end_ name) + return $ stripTrailingNewlines $ T.unpack $ untokenize res + +inlineEnvironment :: PandocMonad m => LP m Inlines +inlineEnvironment = try $ do + controlSeq "begin" + name <- untokenize <$> braced + M.findWithDefault mzero name inlineEnvironments + +inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines) +inlineEnvironments = M.fromList [ + ("displaymath", mathEnvWith id Nothing "displaymath") + , ("math", math <$> mathEnv "math") + , ("equation", mathEnvWith id Nothing "equation") + , ("equation*", mathEnvWith id Nothing "equation*") + , ("gather", mathEnvWith id (Just "gathered") "gather") + , ("gather*", mathEnvWith id (Just "gathered") "gather*") + , ("multline", mathEnvWith id (Just "gathered") "multline") + , ("multline*", mathEnvWith id (Just "gathered") "multline*") + , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray") + , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*") + , ("align", mathEnvWith id (Just "aligned") "align") + , ("align*", mathEnvWith id (Just "aligned") "align*") + , ("alignat", mathEnvWith id (Just "aligned") "alignat") + , ("alignat*", mathEnvWith id (Just "aligned") "alignat*") + ] + +inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines) +inlineCommands = M.fromList $ + [ ("emph", extractSpaces emph <$> tok) + , ("textit", extractSpaces emph <$> tok) + , ("textsl", extractSpaces emph <$> tok) + , ("textsc", extractSpaces smallcaps <$> tok) + , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok) + , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok) + , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok) + , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok) + , ("texttt", ttfamily) + , ("sout", extractSpaces strikeout <$> tok) + , ("textsuperscript", extractSpaces superscript <$> tok) + , ("textsubscript", extractSpaces subscript <$> tok) + , ("textbackslash", lit "\\") + , ("backslash", lit "\\") + , ("slash", lit "/") + , ("textbf", extractSpaces strong <$> tok) + , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok) + , ("ldots", lit "…") + , ("vdots", lit "\8942") + , ("dots", lit "…") + , ("mdots", lit "…") + , ("sim", lit "~") + , ("label", rawInlineOr "label" (inBrackets <$> tok)) + , ("ref", rawInlineOr "ref" (inBrackets <$> tok)) + , ("textgreek", tok) + , ("sep", lit ",") + , ("cref", rawInlineOr "cref" (inBrackets <$> tok)) -- from cleveref.sty + , ("(", mathInline . toksToString <$> manyTill anyTok (controlSeq ")")) + , ("[", mathDisplay . toksToString <$> manyTill anyTok (controlSeq "]")) + , ("ensuremath", mathInline . toksToString <$> braced) + , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok) + , ("P", lit "¶") + , ("S", lit "§") + , ("$", lit "$") + , ("%", lit "%") + , ("&", lit "&") + , ("#", lit "#") + , ("_", lit "_") + , ("{", lit "{") + , ("}", lit "}") + -- old TeX commands + , ("em", extractSpaces emph <$> inlines) + , ("it", extractSpaces emph <$> inlines) + , ("sl", extractSpaces emph <$> inlines) + , ("bf", extractSpaces strong <$> inlines) + , ("rm", inlines) + , ("itshape", extractSpaces emph <$> inlines) + , ("slshape", extractSpaces emph <$> inlines) + , ("scshape", extractSpaces smallcaps <$> inlines) + , ("bfseries", extractSpaces strong <$> inlines) + , ("/", pure mempty) -- italic correction + , ("aa", lit "å") + , ("AA", lit "Å") + , ("ss", lit "ß") + , ("o", lit "ø") + , ("O", lit "Ø") + , ("L", lit "Ł") + , ("l", lit "ł") + , ("ae", lit "æ") + , ("AE", lit "Æ") + , ("oe", lit "œ") + , ("OE", lit "Œ") + , ("pounds", lit "£") + , ("euro", lit "€") + , ("copyright", lit "©") + , ("textasciicircum", lit "^") + , ("textasciitilde", lit "~") + , ("H", try $ tok >>= accent hungarumlaut) + , ("`", option (str "`") $ try $ tok >>= accent grave) + , ("'", option (str "'") $ try $ tok >>= accent acute) + , ("^", option (str "^") $ try $ tok >>= accent circ) + , ("~", option (str "~") $ try $ tok >>= accent tilde) + , ("\"", option (str "\"") $ try $ tok >>= accent umlaut) + , (".", option (str ".") $ try $ tok >>= accent dot) + , ("=", option (str "=") $ try $ tok >>= accent macron) + , ("c", option (str "c") $ try $ tok >>= accent cedilla) + , ("v", option (str "v") $ try $ tok >>= accent hacek) + , ("u", option (str "u") $ try $ tok >>= accent breve) + , ("i", lit "i") + , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState + guard $ not inTableCell + optional (bracketed inline) + spaces)) + , (",", lit "\8198") + , ("@", pure mempty) + , (" ", lit "\160") + , ("ps", pure $ str "PS." <> space) + , ("TeX", lit "TeX") + , ("LaTeX", lit "LaTeX") + , ("bar", lit "|") + , ("textless", lit "<") + , ("textgreater", lit ">") + , ("thanks", note <$> grouped block) + , ("footnote", note <$> grouped block) + , ("verb", doverb) + , ("lstinline", dolstinline) + , ("Verb", doverb) + , ("url", ((unescapeURL . T.unpack . untokenize) <$> braced) >>= \url -> + pure (link url "" (str url))) + , ("href", (unescapeURL . toksToString <$> + braced <* optional sp) >>= \url -> + tok >>= \lab -> pure (link url "" lab)) + , ("includegraphics", do options <- option [] keyvals + src <- unescapeURL . T.unpack . + removeDoubleQuotes . untokenize <$> braced + mkImage options src) + , ("enquote", enquote) + , ("cite", citation "cite" NormalCitation False) + , ("Cite", citation "Cite" NormalCitation False) + , ("citep", citation "citep" NormalCitation False) + , ("citep*", citation "citep*" NormalCitation False) + , ("citeal", citation "citeal" NormalCitation False) + , ("citealp", citation "citealp" NormalCitation False) + , ("citealp*", citation "citealp*" NormalCitation False) + , ("autocite", citation "autocite" NormalCitation False) + , ("smartcite", citation "smartcite" NormalCitation False) + , ("footcite", inNote <$> citation "footcite" NormalCitation False) + , ("parencite", citation "parencite" NormalCitation False) + , ("supercite", citation "supercite" NormalCitation False) + , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False) + , ("citeyearpar", citation "citeyearpar" SuppressAuthor False) + , ("citeyear", citation "citeyear" SuppressAuthor False) + , ("autocite*", citation "autocite*" SuppressAuthor False) + , ("cite*", citation "cite*" SuppressAuthor False) + , ("parencite*", citation "parencite*" SuppressAuthor False) + , ("textcite", citation "textcite" AuthorInText False) + , ("citet", citation "citet" AuthorInText False) + , ("citet*", citation "citet*" AuthorInText False) + , ("citealt", citation "citealt" AuthorInText False) + , ("citealt*", citation "citealt*" AuthorInText False) + , ("textcites", citation "textcites" AuthorInText True) + , ("cites", citation "cites" NormalCitation True) + , ("autocites", citation "autocites" NormalCitation True) + , ("footcites", inNote <$> citation "footcites" NormalCitation True) + , ("parencites", citation "parencites" NormalCitation True) + , ("supercites", citation "supercites" NormalCitation True) + , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True) + , ("Autocite", citation "Autocite" NormalCitation False) + , ("Smartcite", citation "Smartcite" NormalCitation False) + , ("Footcite", citation "Footcite" NormalCitation False) + , ("Parencite", citation "Parencite" NormalCitation False) + , ("Supercite", citation "Supercite" NormalCitation False) + , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False) + , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False) + , ("Citeyear", citation "Citeyear" SuppressAuthor False) + , ("Autocite*", citation "Autocite*" SuppressAuthor False) + , ("Cite*", citation "Cite*" SuppressAuthor False) + , ("Parencite*", citation "Parencite*" SuppressAuthor False) + , ("Textcite", citation "Textcite" AuthorInText False) + , ("Textcites", citation "Textcites" AuthorInText True) + , ("Cites", citation "Cites" NormalCitation True) + , ("Autocites", citation "Autocites" NormalCitation True) + , ("Footcites", citation "Footcites" NormalCitation True) + , ("Parencites", citation "Parencites" NormalCitation True) + , ("Supercites", citation "Supercites" NormalCitation True) + , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True) + , ("citetext", complexNatbibCitation NormalCitation) + , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *> + complexNatbibCitation AuthorInText) + <|> citation "citeauthor" AuthorInText False) + , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>= + addMeta "nocite")) + , ("hypertarget", braced >> tok) + -- siuntix + , ("SI", dosiunitx) + -- hyphenat + , ("bshyp", lit "\\\173") + , ("fshyp", lit "/\173") + , ("dothyp", lit ".\173") + , ("colonhyp", lit ":\173") + , ("hyp", lit "-") + , ("nohyphens", tok) + , ("textnhtt", ttfamily) + , ("nhttfamily", ttfamily) + -- LaTeX colors + , ("textcolor", coloredInline "color") + , ("colorbox", coloredInline "background-color") + -- fontawesome + , ("faCheck", lit "\10003") + , ("faClose", lit "\10007") + ] + +coloredInline :: PandocMonad m => String -> LP m Inlines +coloredInline stylename = do + skipopts + color <- braced + spanWith ("",[],[("style",stylename ++ ": " ++ toksToString color)]) <$> tok + +ttfamily :: PandocMonad m => LP m Inlines +ttfamily = (code . stringify . toList) <$> tok + +rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines +rawInlineOr name' fallback = do + parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions + if parseRaw + then rawInline "latex" <$> getRawCommand name' + else fallback + +getRawCommand :: PandocMonad m => Text -> LP m String +getRawCommand txt = do + (_, rawargs) <- withRaw + ((if txt == "\\write" + then () <$ satisfyTok isWordTok -- digits + else return ()) *> + skipangles *> + skipopts *> + option "" (try (optional sp *> dimenarg)) *> + many braced) + return $ T.unpack (txt <> untokenize rawargs) + +isBlockCommand :: Text -> Bool +isBlockCommand s = + s `M.member` (blockCommands :: M.Map Text (LP PandocPure Blocks)) + || s `Set.member` treatAsBlock + +treatAsBlock :: Set.Set Text +treatAsBlock = Set.fromList + [ "newcommand", "renewcommand" + , "newenvironment", "renewenvironment" + , "providecommand", "provideenvironment" + -- newcommand, etc. should be parsed by macroDef, but we need this + -- here so these aren't parsed as inline commands to ignore + , "special", "pdfannot", "pdfstringdef" + , "bibliographystyle" + , "maketitle", "makeindex", "makeglossary" + , "addcontentsline", "addtocontents", "addtocounter" + -- \ignore{} is used conventionally in literate haskell for definitions + -- that are to be processed by the compiler but not printed. + , "ignore" + , "hyperdef" + , "markboth", "markright", "markleft" + , "hspace", "vspace" + , "newpage" + , "clearpage" + , "pagebreak" + ] + +isInlineCommand :: Text -> Bool +isInlineCommand s = + s `M.member` (inlineCommands :: M.Map Text (LP PandocPure Inlines)) + || s `Set.member` treatAsInline + +treatAsInline :: Set.Set Text +treatAsInline = Set.fromList + [ "index" + , "hspace" + , "vspace" + , "noindent" + , "newpage" + , "clearpage" + , "pagebreak" + ] + +lookupListDefault :: (Show k, Ord k) => v -> [k] -> M.Map k v -> v +lookupListDefault d = (fromMaybe d .) . lookupList + where lookupList l m = msum $ map (`M.lookup` m) l + +inline :: PandocMonad m => LP m Inlines +inline = (mempty <$ comment) + <|> (space <$ whitespace) + <|> (softbreak <$ endline) + <|> word + <|> inlineCommand' + <|> inlineEnvironment + <|> inlineGroup + <|> (symbol '-' *> + option (str "-") (symbol '-' *> + option (str "–") (str "—" <$ symbol '-'))) + <|> doubleQuote + <|> singleQuote + <|> (str "”" <$ try (symbol '\'' >> symbol '\'')) + <|> (str "”" <$ symbol '”') + <|> (str "’" <$ symbol '\'') + <|> (str "’" <$ symbol '’') + <|> (str "\160" <$ symbol '~') + <|> dollarsMath + <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb) + <|> (str . (:[]) <$> primEscape) + <|> regularSymbol + <|> (do res <- symbolIn "#^'`\"[]" + pos <- getPosition + let s = T.unpack (untoken res) + report $ ParsingUnescaped s pos + return $ str s) + +inlines :: PandocMonad m => LP m Inlines +inlines = mconcat <$> many inline + +-- block elements: + +begin_ :: PandocMonad m => Text -> LP m () +begin_ t = (try $ do + controlSeq "begin" + spaces + symbol '{' + spaces + Tok _ Word txt <- satisfyTok isWordTok + spaces + symbol '}' + guard (t == txt)) <?> ("\\begin{" ++ T.unpack t ++ "}") + +end_ :: PandocMonad m => Text -> LP m () +end_ t = (try $ do + controlSeq "end" + spaces + symbol '{' + spaces + Tok _ Word txt <- satisfyTok isWordTok + spaces + symbol '}' + guard $ t == txt) <?> ("\\end{" ++ T.unpack t ++ "}") + +preamble :: PandocMonad m => LP m Blocks +preamble = mempty <$ many preambleBlock + where preambleBlock = spaces1 + <|> void include + <|> void macroDef + <|> void blockCommand + <|> void braced + <|> (notFollowedBy (begin_ "document") >> void anyTok) -inlineChar :: PandocMonad m => LP m Char -inlineChar = noneOf "\\$%&~#{}^'`\"‘’“”-[] \t\n" +paragraph :: PandocMonad m => LP m Blocks +paragraph = do + x <- trimInlines . mconcat <$> many1 inline + if x == mempty + then return mempty + else return $ para x + +include :: PandocMonad m => LP m Blocks +include = do + (Tok _ (CtrlSeq name) _) <- + controlSeq "include" <|> controlSeq "input" <|> + controlSeq "subfile" <|> controlSeq "usepackage" + skipMany $ bracketed inline -- skip options + fs <- (map trim . splitBy (==',') . T.unpack . untokenize) <$> braced + let fs' = if name == "usepackage" + then map (maybeAddExtension ".sty") fs + else map (maybeAddExtension ".tex") fs + dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" + mconcat <$> mapM (insertIncludedFile blocks (tokenize . T.pack) dirs) fs' + +maybeAddExtension :: String -> FilePath -> FilePath +maybeAddExtension ext fp = + if null (takeExtension fp) + then addExtension fp ext + else fp + +addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m () +addMeta field val = updateState $ \st -> + st{ sMeta = addMetaField field val $ sMeta st } + +authors :: PandocMonad m => LP m () +authors = try $ do + bgroup + let oneAuthor = mconcat <$> + many1 (notFollowedBy' (controlSeq "and") >> + (inline <|> mempty <$ blockCommand)) + -- skip e.g. \vspace{10pt} + auths <- sepBy oneAuthor (controlSeq "and") + egroup + addMeta "author" (map trimInlines auths) + +macroDef :: PandocMonad m => LP m Blocks +macroDef = do + guardEnabled Ext_latex_macros + mempty <$ ((commandDef <|> environmentDef) <* doMacros 0) + where commandDef = do + (name, macro') <- newcommand + updateState $ \s -> s{ sMacros = M.insert name macro' (sMacros s) } + environmentDef = do + (name, macro1, macro2) <- newenvironment + updateState $ \s -> s{ sMacros = + M.insert name macro1 (sMacros s) } + updateState $ \s -> s{ sMacros = + M.insert ("end" <> name) macro2 (sMacros s) } + -- @\newenvironment{envname}[n-args][default]{begin}{end}@ + -- is equivalent to + -- @\newcommand{\envname}[n-args][default]{begin}@ + -- @\newcommand{\endenvname}@ + +newcommand :: PandocMonad m => LP m (Text, Macro) +newcommand = do + pos <- getPosition + Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|> + controlSeq "renewcommand" <|> + controlSeq "providecommand" + optional $ symbol '*' + Tok _ (CtrlSeq name) txt <- withVerbatimMode $ anyControlSeq <|> + (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}') + spaces + numargs <- option 0 $ try bracketedNum + spaces + optarg <- option Nothing $ Just <$> try bracketedToks + spaces + contents <- braced + when (mtype == "newcommand") $ do + macros <- sMacros <$> getState + case M.lookup name macros of + Just _ -> report $ MacroAlreadyDefined (T.unpack txt) pos + Nothing -> return () + return (name, Macro numargs optarg contents) + +newenvironment :: PandocMonad m => LP m (Text, Macro, Macro) +newenvironment = do + pos <- getPosition + Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|> + controlSeq "renewenvironment" <|> + controlSeq "provideenvironment" + optional $ symbol '*' + symbol '{' + spaces + Tok _ Word name <- satisfyTok isWordTok + spaces + symbol '}' + spaces + numargs <- option 0 $ try bracketedNum + spaces + optarg <- option Nothing $ Just <$> try bracketedToks + spaces + startcontents <- braced + spaces + endcontents <- braced + when (mtype == "newenvironment") $ do + macros <- sMacros <$> getState + case M.lookup name macros of + Just _ -> report $ MacroAlreadyDefined (T.unpack name) pos + Nothing -> return () + return (name, Macro numargs optarg startcontents, + Macro 0 Nothing endcontents) + +bracketedToks :: PandocMonad m => LP m [Tok] +bracketedToks = do + symbol '[' + manyTill anyTok (symbol ']') + +bracketedNum :: PandocMonad m => LP m Int +bracketedNum = do + ds <- untokenize <$> bracketedToks + case safeRead (T.unpack ds) of + Just i -> return i + _ -> return 0 + +setCaption :: PandocMonad m => LP m Blocks +setCaption = do + ils <- tok + mblabel <- option Nothing $ + try $ spaces >> controlSeq "label" >> (Just <$> tok) + let ils' = case mblabel of + Just lab -> ils <> spanWith + ("",[],[("data-label", stringify lab)]) mempty + Nothing -> ils + updateState $ \st -> st{ sCaption = Just ils' } + return mempty + +looseItem :: PandocMonad m => LP m Blocks +looseItem = do + inListItem <- sInListItem <$> getState + guard $ not inListItem + skipopts + return mempty + +resetCaption :: PandocMonad m => LP m () +resetCaption = updateState $ \st -> st{ sCaption = Nothing } + +section :: PandocMonad m => Attr -> Int -> LP m Blocks +section (ident, classes, kvs) lvl = do + skipopts + contents <- grouped inline + lab <- option ident $ + try (spaces >> controlSeq "label" + >> spaces >> toksToString <$> braced) + attr' <- registerHeader (lab, classes, kvs) contents + return $ headerWith attr' lvl contents + +blockCommand :: PandocMonad m => LP m Blocks +blockCommand = try $ do + Tok _ (CtrlSeq name) txt <- anyControlSeq + guard $ name /= "begin" && name /= "end" + star <- option "" ("*" <$ symbol '*' <* optional sp) + let name' = name <> star + let names = ordNub [name', name] + let raw = do + guard $ isBlockCommand name || not (isInlineCommand name) + rawBlock "latex" <$> getRawCommand (txt <> star) + lookupListDefault raw names blockCommands + +closing :: PandocMonad m => LP m Blocks +closing = do + contents <- tok + st <- getState + let extractInlines (MetaBlocks [Plain ys]) = ys + extractInlines (MetaBlocks [Para ys ]) = ys + extractInlines _ = [] + let sigs = case lookupMeta "author" (sMeta st) of + Just (MetaList xs) -> + para $ trimInlines $ fromList $ + intercalate [LineBreak] $ map extractInlines xs + _ -> mempty + return $ para (trimInlines contents) <> sigs + +blockCommands :: PandocMonad m => M.Map Text (LP m Blocks) +blockCommands = M.fromList $ + [ ("par", mempty <$ skipopts) + , ("parbox", braced >> grouped blocks) + , ("title", mempty <$ (skipopts *> + (grouped inline >>= addMeta "title") + <|> (grouped block >>= addMeta "title"))) + , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle")) + , ("author", mempty <$ (skipopts *> authors)) + -- -- in letter class, temp. store address & sig as title, author + , ("address", mempty <$ (skipopts *> tok >>= addMeta "address")) + , ("signature", mempty <$ (skipopts *> authors)) + , ("date", mempty <$ (skipopts *> tok >>= addMeta "date")) + -- Koma-script metadata commands + , ("dedication", mempty <$ (skipopts *> tok >>= addMeta "dedication")) + -- sectioning + , ("part", section nullAttr (-1)) + , ("part*", section nullAttr (-1)) + , ("chapter", section nullAttr 0) + , ("chapter*", section ("",["unnumbered"],[]) 0) + , ("section", section nullAttr 1) + , ("section*", section ("",["unnumbered"],[]) 1) + , ("subsection", section nullAttr 2) + , ("subsection*", section ("",["unnumbered"],[]) 2) + , ("subsubsection", section nullAttr 3) + , ("subsubsection*", section ("",["unnumbered"],[]) 3) + , ("paragraph", section nullAttr 4) + , ("paragraph*", section ("",["unnumbered"],[]) 4) + , ("subparagraph", section nullAttr 5) + , ("subparagraph*", section ("",["unnumbered"],[]) 5) + -- beamer slides + , ("frametitle", section nullAttr 3) + , ("framesubtitle", section nullAttr 4) + -- letters + , ("opening", (para . trimInlines) <$> (skipopts *> tok)) + , ("closing", skipopts *> closing) + -- + , ("hrule", pure horizontalRule) + , ("strut", pure mempty) + , ("rule", skipopts *> tok *> tok *> pure horizontalRule) + , ("item", looseItem) + , ("documentclass", skipopts *> braced *> preamble) + , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) + , ("caption", skipopts *> setCaption) + , ("bibliography", mempty <$ (skipopts *> braced >>= + addMeta "bibliography" . splitBibs . toksToString)) + , ("addbibresource", mempty <$ (skipopts *> braced >>= + addMeta "bibliography" . splitBibs . toksToString)) + -- includes + , ("lstinputlisting", inputListing) + , ("graphicspath", graphicsPath) + -- hyperlink + , ("hypertarget", try $ braced >> grouped block) + -- LaTeX colors + , ("textcolor", coloredBlock "color") + , ("colorbox", coloredBlock "background-color") + ] + + +environments :: PandocMonad m => M.Map Text (LP m Blocks) +environments = M.fromList + [ ("document", env "document" blocks) + , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract")) + , ("letter", env "letter" letterContents) + , ("minipage", env "minipage" $ + skipopts *> spaces *> optional braced *> spaces *> blocks) + , ("figure", env "figure" $ skipopts *> figure) + , ("subfigure", env "subfigure" $ skipopts *> tok *> figure) + , ("center", env "center" blocks) + , ("longtable", env "longtable" $ + resetCaption *> simpTable "longtable" False >>= addTableCaption) + , ("table", env "table" $ + resetCaption *> skipopts *> blocks >>= addTableCaption) + , ("tabular*", env "tabular" $ simpTable "tabular*" True) + , ("tabularx", env "tabularx" $ simpTable "tabularx" True) + , ("tabular", env "tabular" $ simpTable "tabular" False) + , ("quote", blockQuote <$> env "quote" blocks) + , ("quotation", blockQuote <$> env "quotation" blocks) + , ("verse", blockQuote <$> env "verse" blocks) + , ("itemize", bulletList <$> listenv "itemize" (many item)) + , ("description", definitionList <$> listenv "description" (many descItem)) + , ("enumerate", orderedList') + , ("alltt", alltt <$> env "alltt" blocks) + , ("code", guardEnabled Ext_literate_haskell *> + (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> + verbEnv "code")) + , ("comment", mempty <$ verbEnv "comment") + , ("verbatim", codeBlock <$> verbEnv "verbatim") + , ("Verbatim", fancyverbEnv "Verbatim") + , ("BVerbatim", fancyverbEnv "BVerbatim") + , ("lstlisting", do attr <- parseListingsOptions <$> option [] keyvals + codeBlockWith attr <$> verbEnv "lstlisting") + , ("minted", minted) + , ("obeylines", obeylines) + , ("displaymath", mathEnvWith para Nothing "displaymath") + , ("equation", mathEnvWith para Nothing "equation") + , ("equation*", mathEnvWith para Nothing "equation*") + , ("gather", mathEnvWith para (Just "gathered") "gather") + , ("gather*", mathEnvWith para (Just "gathered") "gather*") + , ("multline", mathEnvWith para (Just "gathered") "multline") + , ("multline*", mathEnvWith para (Just "gathered") "multline*") + , ("eqnarray", mathEnvWith para (Just "aligned") "eqnarray") + , ("eqnarray*", mathEnvWith para (Just "aligned") "eqnarray*") + , ("align", mathEnvWith para (Just "aligned") "align") + , ("align*", mathEnvWith para (Just "aligned") "align*") + , ("alignat", mathEnvWith para (Just "aligned") "alignat") + , ("alignat*", mathEnvWith para (Just "aligned") "alignat*") + , ("tikzpicture", rawVerbEnv "tikzpicture") + ] environment :: PandocMonad m => LP m Blocks environment = do controlSeq "begin" - name <- braced + name <- untokenize <$> braced M.findWithDefault mzero name environments <|> rawEnv name -inlineEnvironment :: PandocMonad m => LP m Inlines -inlineEnvironment = try $ do - controlSeq "begin" - name <- braced - M.findWithDefault mzero name inlineEnvironments +env :: PandocMonad m => Text -> LP m a -> LP m a +env name p = p <* end_ name -rawEnv :: PandocMonad m => String -> LP m Blocks +rawEnv :: PandocMonad m => Text -> LP m Blocks rawEnv name = do exts <- getOption readerExtensions let parseRaw = extensionEnabled Ext_raw_tex exts rawOptions <- mconcat <$> many rawopt - let beginCommand = "\\begin{" ++ name ++ "}" ++ rawOptions + let beginCommand = "\\begin{" <> name <> "}" <> rawOptions pos1 <- getPosition (bs, raw) <- withRaw $ env name blocks - raw' <- applyMacros' $ beginCommand ++ raw - if raw' /= beginCommand ++ raw - then parseFromString' blocks raw' - else if parseRaw - then return $ rawBlock "latex" $ beginCommand ++ raw' - else do - unless parseRaw $ do - report $ SkippedContent beginCommand pos1 - pos2 <- getPosition - report $ SkippedContent ("\\end{" ++ name ++ "}") pos2 - return bs - -rawVerbEnv :: PandocMonad m => String -> LP m Blocks + if parseRaw + then return $ rawBlock "latex" + $ T.unpack $ beginCommand <> untokenize raw + else do + unless parseRaw $ do + report $ SkippedContent (T.unpack beginCommand) pos1 + pos2 <- getPosition + report $ SkippedContent ("\\end{" ++ T.unpack name ++ "}") pos2 + return bs + +rawVerbEnv :: PandocMonad m => Text -> LP m Blocks rawVerbEnv name = do pos <- getPosition (_, raw) <- withRaw $ verbEnv name - let raw' = "\\begin{tikzpicture}" ++ raw + let raw' = "\\begin{tikzpicture}" ++ toksToString raw exts <- getOption readerExtensions let parseRaw = extensionEnabled Ext_raw_tex exts if parseRaw @@ -1073,36 +1810,118 @@ rawVerbEnv name = do report $ SkippedContent raw' pos return mempty ----- +verbEnv :: PandocMonad m => Text -> LP m String +verbEnv name = withVerbatimMode $ do + skipopts + optional blankline + res <- manyTill anyTok (end_ name) + return $ stripTrailingNewlines $ toksToString res -maybeAddExtension :: String -> FilePath -> FilePath -maybeAddExtension ext fp = - if null (takeExtension fp) - then addExtension fp ext - else fp +fancyverbEnv :: PandocMonad m => Text -> LP m Blocks +fancyverbEnv name = do + options <- option [] keyvals + let kvs = [ (if k == "firstnumber" + then "startFrom" + else k, v) | (k,v) <- options ] + let classes = [ "numberLines" | + lookup "numbers" options == Just "left" ] + let attr = ("",classes,kvs) + codeBlockWith attr <$> verbEnv name -include :: PandocMonad m => LP m Blocks -include = do - fs' <- try $ do - char '\\' - name <- try (string "include") - <|> try (string "input") - <|> try (string "subfile") - <|> string "usepackage" - -- skip options - skipMany $ try $ char '[' *> manyTill anyChar (char ']') - fs <- (map trim . splitBy (==',')) <$> braced - return $ if name == "usepackage" - then map (maybeAddExtension ".sty") fs - else map (maybeAddExtension ".tex") fs - dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" - mconcat <$> mapM (insertIncludedFile blocks dirs) fs' +obeylines :: PandocMonad m => LP m Blocks +obeylines = do + para . fromList . removeLeadingTrailingBreaks . + walk softBreakToHard . toList <$> env "obeylines" inlines + where softBreakToHard SoftBreak = LineBreak + softBreakToHard x = x + removeLeadingTrailingBreaks = reverse . dropWhile isLineBreak . + reverse . dropWhile isLineBreak + isLineBreak LineBreak = True + isLineBreak _ = False + +minted :: PandocMonad m => LP m Blocks +minted = do + options <- option [] keyvals + lang <- toksToString <$> braced + let kvs = [ (if k == "firstnumber" + then "startFrom" + else k, v) | (k,v) <- options ] + let classes = [ lang | not (null lang) ] ++ + [ "numberLines" | + lookup "linenos" options == Just "true" ] + let attr = ("",classes,kvs) + codeBlockWith attr <$> verbEnv "minted" + +letterContents :: PandocMonad m => LP m Blocks +letterContents = do + bs <- blocks + st <- getState + -- add signature (author) and address (title) + let addr = case lookupMeta "address" (sMeta st) of + Just (MetaBlocks [Plain xs]) -> + para $ trimInlines $ fromList xs + _ -> mempty + return $ addr <> bs -- sig added by \closing + +figure :: PandocMonad m => LP m Blocks +figure = try $ do + resetCaption + blocks >>= addImageCaption + +addImageCaption :: PandocMonad m => Blocks -> LP m Blocks +addImageCaption = walkM go + where go (Image attr alt (src,tit)) + | not ("fig:" `isPrefixOf` tit) = do + mbcapt <- sCaption <$> getState + return $ case mbcapt of + Just ils -> Image attr (toList ils) (src, "fig:" ++ tit) + Nothing -> Image attr alt (src,tit) + go x = return x + +coloredBlock :: PandocMonad m => String -> LP m Blocks +coloredBlock stylename = do + skipopts + color <- braced + let constructor = divWith ("",[],[("style",stylename ++ ": " ++ toksToString color)]) + inlineContents <|> constructor <$> blockContents + where inlineContents = do + ils <- grouped inline + rest <- inlines + return (para (ils <> rest)) + blockContents = grouped block + +graphicsPath :: PandocMonad m => LP m Blocks +graphicsPath = do + ps <- map toksToString <$> (bgroup *> manyTill braced egroup) + getResourcePath >>= setResourcePath . (++ ps) + return mempty + +splitBibs :: String -> [Inlines] +splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',') + +alltt :: Blocks -> Blocks +alltt = walk strToCode + where strToCode (Str s) = Code nullAttr s + strToCode Space = RawInline (Format "latex") "\\ " + strToCode SoftBreak = LineBreak + strToCode x = x + +parseListingsOptions :: [(String, String)] -> Attr +parseListingsOptions options = + let kvs = [ (if k == "firstnumber" + then "startFrom" + else k, v) | (k,v) <- options ] + classes = [ "numberLines" | + lookup "numbers" options == Just "left" ] + ++ maybeToList (lookup "language" options + >>= fromListingsLanguage) + in (fromMaybe "" (lookup "label" options), classes, kvs) inputListing :: PandocMonad m => LP m Blocks inputListing = do pos <- getPosition options <- option [] keyvals - f <- filter (/='"') <$> braced + f <- filter (/='"') . toksToString <$> braced dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" mbCode <- readFileFromDirs dirs f codeLines <- case mbCode of @@ -1121,169 +1940,10 @@ inputListing = do drop (firstline - 1) codeLines return $ codeBlockWith (ident,ordNub (classes ++ language),kvs) codeContents -parseListingsOptions :: [(String, String)] -> Attr -parseListingsOptions options = - let kvs = [ (if k == "firstnumber" - then "startFrom" - else k, v) | (k,v) <- options ] - classes = [ "numberLines" | - lookup "numbers" options == Just "left" ] - ++ maybeToList (lookup "language" options - >>= fromListingsLanguage) - in (fromMaybe "" (lookup "label" options), classes, kvs) - ----- - -keyval :: PandocMonad m => LP m (String, String) -keyval = try $ do - key <- many1 alphaNum - val <- option "" $ char '=' >> braced <|> (many1 (alphaNum <|> oneOf ".:-|\\")) - skipMany spaceChar - optional (char ',') - skipMany spaceChar - return (key, val) - - -keyvals :: PandocMonad m => LP m [(String, String)] -keyvals = try $ char '[' *> manyTill keyval (char ']') - -alltt :: PandocMonad m => String -> LP m Blocks -alltt t = walk strToCode <$> parseFromString' blocks - (substitute " " "\\ " $ substitute "%" "\\%" $ - intercalate "\\\\\n" $ lines t) - where strToCode (Str s) = Code nullAttr s - strToCode x = x - -rawLaTeXBlock :: PandocMonad m => LP m String -rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand)) - -rawLaTeXInline :: PandocMonad m => LP m Inline -rawLaTeXInline = do - raw <- (snd <$> withRaw inlineCommand) - <|> (snd <$> withRaw inlineEnvironment) - <|> (snd <$> withRaw blockCommand) - RawInline "latex" <$> applyMacros' raw - -addImageCaption :: PandocMonad m => Blocks -> LP m Blocks -addImageCaption = walkM go - where go (Image attr alt (src,tit)) - | not ("fig:" `isPrefixOf` tit) = do - mbcapt <- stateCaption <$> getState - return $ case mbcapt of - Just ils -> Image attr (toList ils) (src, "fig:" ++ tit) - Nothing -> Image attr alt (src,tit) - go x = return x - -addTableCaption :: PandocMonad m => Blocks -> LP m Blocks -addTableCaption = walkM go - where go (Table c als ws hs rs) = do - mbcapt <- stateCaption <$> getState - return $ case mbcapt of - Just ils -> Table (toList ils) als ws hs rs - Nothing -> Table c als ws hs rs - go x = return x - -environments :: PandocMonad m => M.Map String (LP m Blocks) -environments = M.fromList - [ ("document", env "document" blocks <* skipMany anyChar) - , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract")) - , ("letter", env "letter" letterContents) - , ("minipage", env "minipage" $ - skipopts *> spaces' *> optional braced *> spaces' *> blocks) - , ("figure", env "figure" $ skipopts *> figure) - , ("subfigure", env "subfigure" $ skipopts *> tok *> figure) - , ("center", env "center" blocks) - , ("longtable", env "longtable" $ - resetCaption *> simpTable "longtable" False >>= addTableCaption) - , ("table", env "table" $ - resetCaption *> skipopts *> blocks >>= addTableCaption) - , ("tabular*", env "tabular" $ simpTable "tabular*" True) - , ("tabularx", env "tabularx" $ simpTable "tabularx" True) - , ("tabular", env "tabular" $ simpTable "tabular" False) - , ("quote", blockQuote <$> env "quote" blocks) - , ("quotation", blockQuote <$> env "quotation" blocks) - , ("verse", blockQuote <$> env "verse" blocks) - , ("itemize", bulletList <$> listenv "itemize" (many item)) - , ("description", definitionList <$> listenv "description" (many descItem)) - , ("enumerate", orderedList') - , ("alltt", alltt =<< verbEnv "alltt") - , ("code", guardEnabled Ext_literate_haskell *> - (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> - verbEnv "code")) - , ("comment", mempty <$ verbEnv "comment") - , ("verbatim", codeBlock <$> verbEnv "verbatim") - , ("Verbatim", fancyverbEnv "Verbatim") - , ("BVerbatim", fancyverbEnv "BVerbatim") - , ("lstlisting", do attr <- parseListingsOptions <$> option [] keyvals - codeBlockWith attr <$> verbEnv "lstlisting") - , ("minted", do options <- option [] keyvals - lang <- grouped (many1 $ satisfy (/='}')) - let kvs = [ (if k == "firstnumber" - then "startFrom" - else k, v) | (k,v) <- options ] - let classes = [ lang | not (null lang) ] ++ - [ "numberLines" | - lookup "linenos" options == Just "true" ] - let attr = ("",classes,kvs) - codeBlockWith attr <$> verbEnv "minted") - , ("obeylines", parseFromString - (para . trimInlines . mconcat <$> many inline) =<< - intercalate "\\\\\n" . lines <$> verbEnv "obeylines") - , ("displaymath", mathEnvWith para Nothing "displaymath") - , ("equation", mathEnvWith para Nothing "equation") - , ("equation*", mathEnvWith para Nothing "equation*") - , ("gather", mathEnvWith para (Just "gathered") "gather") - , ("gather*", mathEnvWith para (Just "gathered") "gather*") - , ("multline", mathEnvWith para (Just "gathered") "multline") - , ("multline*", mathEnvWith para (Just "gathered") "multline*") - , ("eqnarray", mathEnvWith para (Just "aligned") "eqnarray") - , ("eqnarray*", mathEnvWith para (Just "aligned") "eqnarray*") - , ("align", mathEnvWith para (Just "aligned") "align") - , ("align*", mathEnvWith para (Just "aligned") "align*") - , ("alignat", mathEnvWith para (Just "aligned") "alignat") - , ("alignat*", mathEnvWith para (Just "aligned") "alignat*") - , ("tikzpicture", rawVerbEnv "tikzpicture") - ] - -figure :: PandocMonad m => LP m Blocks -figure = try $ do - resetCaption - blocks >>= addImageCaption - -letterContents :: PandocMonad m => LP m Blocks -letterContents = do - bs <- blocks - st <- getState - -- add signature (author) and address (title) - let addr = case lookupMeta "address" (stateMeta st) of - Just (MetaBlocks [Plain xs]) -> - para $ trimInlines $ fromList xs - _ -> mempty - return $ addr <> bs -- sig added by \closing - -closing :: PandocMonad m => LP m Blocks -closing = do - contents <- tok - st <- getState - let extractInlines (MetaBlocks [Plain ys]) = ys - extractInlines (MetaBlocks [Para ys ]) = ys - extractInlines _ = [] - let sigs = case lookupMeta "author" (stateMeta st) of - Just (MetaList xs) -> - para $ trimInlines $ fromList $ - intercalate [LineBreak] $ map extractInlines xs - _ -> mempty - return $ para (trimInlines contents) <> sigs +-- lists item :: PandocMonad m => LP m Blocks -item = blocks *> controlSeq "item" *> skipopts *> blocks - -looseItem :: PandocMonad m => LP m Blocks -looseItem = do - ctx <- stateParserContext `fmap` getState - if ctx == ListItemState - then mzero - else return mempty +item = void blocks *> controlSeq "item" *> skipopts *> blocks descItem :: PandocMonad m => LP m (Inlines, [Blocks]) descItem = do @@ -1294,302 +1954,210 @@ descItem = do bs <- blocks return (ils, [bs]) -env :: PandocMonad m => String -> LP m a -> LP m a -env name p = p <* - (try (controlSeq "end" *> braced >>= guard . (== name)) - <?> ("\\end{" ++ name ++ "}")) - -listenv :: PandocMonad m => String -> LP m a -> LP m a +listenv :: PandocMonad m => Text -> LP m a -> LP m a listenv name p = try $ do - oldCtx <- stateParserContext `fmap` getState - updateState $ \st -> st{ stateParserContext = ListItemState } + oldInListItem <- sInListItem `fmap` getState + updateState $ \st -> st{ sInListItem = True } res <- env name p - updateState $ \st -> st{ stateParserContext = oldCtx } + updateState $ \st -> st{ sInListItem = oldInListItem } return res -mathEnvWith :: PandocMonad m - => (Inlines -> a) -> Maybe String -> String -> LP m a -mathEnvWith f innerEnv name = f <$> mathDisplay (inner <$> mathEnv name) - where inner x = case innerEnv of - Nothing -> x - Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++ - "\\end{" ++ y ++ "}" - -mathEnv :: PandocMonad m => String -> LP m String -mathEnv name = do - skipopts - optional blankline - let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name) - charMuncher = skipMany comment *> - (many1 (noneOf "\\%") <|> try (string "\\%") - <|> try (string "\\\\") <|> count 1 anyChar) - res <- concat <$> manyTill charMuncher endEnv - return $ stripTrailingNewlines res - -verbEnv :: PandocMonad m => String -> LP m String -verbEnv name = do - skipopts - optional blankline - let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name) - charMuncher = anyChar - res <- manyTill charMuncher endEnv - return $ stripTrailingNewlines res - -fancyverbEnv :: PandocMonad m => String -> LP m Blocks -fancyverbEnv name = do - options <- option [] keyvals - let kvs = [ (if k == "firstnumber" - then "startFrom" - else k, v) | (k,v) <- options ] - let classes = [ "numberLines" | - lookup "numbers" options == Just "left" ] - let attr = ("",classes,kvs) - codeBlockWith attr <$> verbEnv name - orderedList' :: PandocMonad m => LP m Blocks orderedList' = try $ do - optional sp - (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ - try $ char '[' *> anyOrderedListMarker <* char ']' spaces - optional $ try $ controlSeq "setlength" *> grouped (controlSeq "itemindent") *> braced + let markerSpec = do + symbol '[' + ts <- toksToString <$> manyTill anyTok (symbol ']') + case runParser anyOrderedListMarker def "option" ts of + Right r -> return r + Left _ -> do + pos <- getPosition + report $ SkippedContent ("[" ++ ts ++ "]") pos + return (1, DefaultStyle, DefaultDelim) + (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) markerSpec spaces - start <- option 1 $ try $ do controlSeq "setcounter" - grouped (string "enum" *> many1 (oneOf "iv")) + optional $ try $ controlSeq "setlength" + *> grouped (count 1 $ controlSeq "itemindent") + *> braced + spaces + start <- option 1 $ try $ do pos <- getPosition + controlSeq "setcounter" + ctr <- toksToString <$> braced + guard $ "enum" `isPrefixOf` ctr + guard $ all (`elem` ['i','v']) (drop 4 ctr) optional sp - num <- grouped (many1 digit) - spaces - return (read num + 1 :: Int) + num <- toksToString <$> braced + case safeRead num of + Just i -> return (i + 1 :: Int) + Nothing -> do + report $ SkippedContent + ("\\setcounter{" ++ ctr ++ + "}{" ++ num ++ "}") pos + return 1 bs <- listenv "enumerate" (many item) return $ orderedListWith (start, style, delim) bs -paragraph :: PandocMonad m => LP m Blocks -paragraph = do - x <- trimInlines . mconcat <$> many1 inline - if x == mempty - then return mempty - else return $ para x - -preamble :: PandocMonad m => LP m Blocks -preamble = mempty <$> manyTill preambleBlock beginDoc - where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}" - preambleBlock = void comment - <|> void sp - <|> void blanklines - <|> void include - <|> void macro - <|> void blockCommand - <|> void anyControlSeq - <|> void braced - <|> void anyChar - -------- - --- citations - -addPrefix :: [Inline] -> [Citation] -> [Citation] -addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks -addPrefix _ _ = [] - -addSuffix :: [Inline] -> [Citation] -> [Citation] -addSuffix s ks@(_:_) = - let k = last ks - in init ks ++ [k {citationSuffix = citationSuffix k ++ s}] -addSuffix _ _ = [] - -simpleCiteArgs :: PandocMonad m => LP m [Citation] -simpleCiteArgs = try $ do - first <- optionMaybe $ toList <$> opt - second <- optionMaybe $ toList <$> opt - keys <- try $ bgroup *> (manyTill citationLabel egroup) - let (pre, suf) = case (first , second ) of - (Just s , Nothing) -> (mempty, s ) - (Just s , Just t ) -> (s , t ) - _ -> (mempty, mempty) - conv k = Citation { citationId = k - , citationPrefix = [] - , citationSuffix = [] - , citationMode = NormalCitation - , citationHash = 0 - , citationNoteNum = 0 - } - return $ addPrefix pre $ addSuffix suf $ map conv keys - -citationLabel :: PandocMonad m => LP m String -citationLabel = optional sp *> - (many1 (satisfy isBibtexKeyChar) - <* optional sp - <* optional (char ',') - <* optional sp) - where isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]" :: String) +-- tables -cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation] -cites mode multi = try $ do - cits <- if multi - then many1 simpleCiteArgs - else count 1 simpleCiteArgs - let cs = concat cits - return $ case mode of - AuthorInText -> case cs of - (c:rest) -> c {citationMode = mode} : rest - [] -> [] - _ -> map (\a -> a {citationMode = mode}) cs +hline :: PandocMonad m => LP m () +hline = try $ do + spaces + controlSeq "hline" <|> + -- booktabs rules: + controlSeq "toprule" <|> + controlSeq "bottomrule" <|> + controlSeq "midrule" <|> + controlSeq "endhead" <|> + controlSeq "endfirsthead" + spaces + optional $ bracketed inline + return () -citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines -citation name mode multi = do - (c,raw) <- withRaw $ cites mode multi - return $ cite c (rawInline "latex" $ "\\" ++ name ++ raw) +lbreak :: PandocMonad m => LP m Tok +lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline") <* spaces -complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines -complexNatbibCitation mode = try $ do - let ils = (toList . trimInlines . mconcat) <$> - many (notFollowedBy (oneOf "\\};") >> inline) - let parseOne = try $ do - skipSpaces - pref <- ils - cit' <- inline -- expect a citation - let citlist = toList cit' - cits' <- case citlist of - [Cite cs _] -> return cs - _ -> mzero - suff <- ils - skipSpaces - optional $ char ';' - return $ addPrefix pref $ addSuffix suff cits' - (c:cits, raw) <- withRaw $ grouped parseOne - return $ cite (c{ citationMode = mode }:cits) - (rawInline "latex" $ "\\citetext" ++ raw) +amp :: PandocMonad m => LP m Tok +amp = symbol '&' --- tables +-- Split a Word into individual Symbols (for parseAligns) +splitWordTok :: PandocMonad m => LP m () +splitWordTok = do + inp <- getInput + case inp of + (Tok spos Word t : rest) -> do + setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) ++ rest + _ -> return () -parseAligns :: PandocMonad m => LP m [(Alignment, Double, (String, String))] +parseAligns :: PandocMonad m => LP m [(Alignment, Double, ([Tok], [Tok]))] parseAligns = try $ do - bgroup - let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced) - maybeBar - let cAlign = AlignCenter <$ char 'c' - let lAlign = AlignLeft <$ char 'l' - let rAlign = AlignRight <$ char 'r' - let parAlign = AlignLeft <$ char 'p' - -- algins from tabularx - let xAlign = AlignLeft <$ char 'X' - let mAlign = AlignLeft <$ char 'm' - let bAlign = AlignLeft <$ char 'b' - let alignChar = cAlign <|> lAlign <|> rAlign <|> parAlign - <|> xAlign <|> mAlign <|> bAlign - let alignPrefix = char '>' >> braced - let alignSuffix = char '<' >> braced + let maybeBar = skipMany $ + sp <|> () <$ symbol '|' <|> () <$ (symbol '@' >> braced) + let cAlign = AlignCenter <$ symbol 'c' + let lAlign = AlignLeft <$ symbol 'l' + let rAlign = AlignRight <$ symbol 'r' + let parAlign = AlignLeft <$ symbol 'p' + -- aligns from tabularx + let xAlign = AlignLeft <$ symbol 'X' + let mAlign = AlignLeft <$ symbol 'm' + let bAlign = AlignLeft <$ symbol 'b' + let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign + <|> xAlign <|> mAlign <|> bAlign ) + let alignPrefix = symbol '>' >> braced + let alignSuffix = symbol '<' >> braced let colWidth = try $ do - char '{' - ds <- many1 (oneOf "0123456789.") + symbol '{' + ds <- trim . toksToString <$> manyTill anyTok (controlSeq "linewidth") spaces - string "\\linewidth" - char '}' + symbol '}' case safeRead ds of Just w -> return w Nothing -> return 0.0 - let alignSpec = do + let alignSpec = try $ do spaces - pref <- option "" alignPrefix + pref <- option [] alignPrefix spaces al <- alignChar - width <- colWidth <|> option 0.0 (do s <- braced + width <- colWidth <|> option 0.0 (do s <- toksToString <$> braced pos <- getPosition report $ SkippedContent s pos return 0.0) spaces - suff <- option "" alignSuffix + suff <- option [] alignSuffix return (al, width, (pref, suff)) - aligns' <- sepEndBy alignSpec maybeBar + bgroup + spaces + maybeBar + aligns' <- many (alignSpec <* maybeBar) spaces egroup spaces - return $ aligns' - -hline :: PandocMonad m => LP m () -hline = try $ do - spaces' - controlSeq "hline" <|> - -- booktabs rules: - controlSeq "toprule" <|> - controlSeq "bottomrule" <|> - controlSeq "midrule" <|> - controlSeq "endhead" <|> - controlSeq "endfirsthead" - spaces' - optional $ bracketed (many1 (satisfy (/=']'))) - return () - -lbreak :: PandocMonad m => LP m () -lbreak = () <$ try (spaces' *> - (controlSeq "\\" <|> controlSeq "tabularnewline") <* - spaces') - -amp :: PandocMonad m => LP m () -amp = () <$ try (spaces' *> char '&' <* spaces') + return aligns' parseTableRow :: PandocMonad m - => String -- ^ table environment name - -> [(String, String)] -- ^ pref/suffixes + => Text -- ^ table environment name + -> [([Tok], [Tok])] -- ^ pref/suffixes -> LP m [Blocks] -parseTableRow envname prefsufs = try $ do +parseTableRow envname prefsufs = do + notFollowedBy (spaces *> end_ envname) let cols = length prefsufs - let tableCellRaw = concat <$> many - (do notFollowedBy amp - notFollowedBy lbreak - notFollowedBy $ () <$ try (string ("\\end{" ++ envname ++ "}")) - many1 (noneOf "&%\n\r\\") - <|> try (string "\\&") - <|> count 1 anyChar) - let plainify bs = case toList bs of - [Para ils] -> plain (fromList ils) - _ -> bs - rawcells <- sepBy1 tableCellRaw amp - guard $ length rawcells == cols - let rawcells' = zipWith (\c (p, s) -> p ++ trim c ++ s) rawcells prefsufs - let tableCell = plainify <$> blocks - cells' <- mapM (parseFromString' tableCell) rawcells' - let numcells = length cells' + -- add prefixes and suffixes in token stream: + let celltoks (pref, suff) = do + prefpos <- getPosition + contents <- many (notFollowedBy + (() <$ amp <|> () <$ lbreak <|> end_ envname) + >> anyTok) + suffpos <- getPosition + option [] (count 1 amp) + return $ map (setpos (sourceLine prefpos, sourceColumn prefpos)) pref + ++ contents ++ + map (setpos (sourceLine suffpos, sourceColumn suffpos)) suff + rawcells <- sequence (map celltoks prefsufs) + oldInput <- getInput + cells <- sequence $ map (\ts -> setInput ts >> parseTableCell) rawcells + setInput oldInput + spaces + let numcells = length cells guard $ numcells <= cols && numcells >= 1 - guard $ cells' /= [mempty] + guard $ cells /= [mempty] -- note: a & b in a three-column table leaves an empty 3rd cell: - let cells'' = cells' ++ replicate (cols - numcells) mempty - spaces' - return cells'' + return $ cells ++ replicate (cols - numcells) mempty -spaces' :: PandocMonad m => LP m () -spaces' = spaces *> skipMany (comment *> spaces) +parseTableCell :: PandocMonad m => LP m Blocks +parseTableCell = do + let plainify bs = case toList bs of + [Para ils] -> plain (fromList ils) + _ -> bs + updateState $ \st -> st{ sInTableCell = True } + cells <- plainify <$> blocks + updateState $ \st -> st{ sInTableCell = False } + return cells -simpTable :: PandocMonad m => String -> Bool -> LP m Blocks +simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks simpTable envname hasWidthParameter = try $ do - when hasWidthParameter $ () <$ (spaces' >> tok) + when hasWidthParameter $ () <$ (spaces >> tok) skipopts colspecs <- parseAligns let (aligns, widths, prefsufs) = unzip3 colspecs let cols = length colspecs optional $ controlSeq "caption" *> skipopts *> setCaption optional lbreak - spaces' + spaces skipMany hline - spaces' + spaces header' <- option [] $ try (parseTableRow envname prefsufs <* lbreak <* many1 hline) - spaces' + spaces rows <- sepEndBy (parseTableRow envname prefsufs) (lbreak <* optional (skipMany hline)) - spaces' + spaces optional $ controlSeq "caption" *> skipopts *> setCaption optional lbreak - spaces' + spaces let header'' = if null header' then replicate cols mempty else header' lookAhead $ controlSeq "end" -- make sure we're at end return $ table mempty (zip aligns widths) header'' rows -removeDoubleQuotes :: String -> String -removeDoubleQuotes ('"':xs) = - case reverse xs of - '"':ys -> reverse ys - _ -> '"':xs -removeDoubleQuotes xs = xs +addTableCaption :: PandocMonad m => Blocks -> LP m Blocks +addTableCaption = walkM go + where go (Table c als ws hs rs) = do + mbcapt <- sCaption <$> getState + return $ case mbcapt of + Just ils -> Table (toList ils) als ws hs rs + Nothing -> Table c als ws hs rs + go x = return x + + +block :: PandocMonad m => LP m Blocks +block = (mempty <$ spaces1) + <|> environment + <|> include + <|> macroDef + <|> blockCommand + <|> paragraph + <|> grouped block + +blocks :: PandocMonad m => LP m Blocks +blocks = mconcat <$> many block + diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs new file mode 100644 index 000000000..6f84ae1f1 --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs @@ -0,0 +1,48 @@ +{- +Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.LaTeX.Types + Copyright : Copyright (C) 2017 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Types for LaTeX tokens and macros. +-} +module Text.Pandoc.Readers.LaTeX.Types ( Tok(..) + , TokType(..) + , Macro(..) + , Line + , Column ) +where +import Data.Text (Text) +import Text.Parsec.Pos (Line, Column) + +data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment | + Esc1 | Esc2 | Arg Int + deriving (Eq, Ord, Show) + +data Tok = Tok (Line, Column) TokType Text + deriving (Eq, Ord, Show) + +data Macro = Macro Int (Maybe [Tok]) [Tok] + deriving Show + diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index e1c481311..ab6a32b78 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -52,15 +52,17 @@ import System.FilePath (addExtension, takeExtension) import Text.HTML.TagSoup import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Class (PandocMonad(..), report) import Text.Pandoc.Definition import Text.Pandoc.Emoji (emojis) +import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag, isCommentTag, isInlineTag, isTextTag) -import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline) +import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline, applyMacros, + macro) import Text.Pandoc.Shared import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.XML (fromEntities) @@ -74,7 +76,7 @@ readMarkdown :: PandocMonad m -> m Pandoc readMarkdown opts s = do parsed <- (readWithM parseMarkdown) def{ stateOptions = opts } - (T.unpack s ++ "\n\n") + (T.unpack (crFilter s) ++ "\n\n") case parsed of Right result -> return result Left e -> throwError e @@ -291,18 +293,22 @@ ignorable t = (T.pack "_") `T.isSuffixOf` t toMetaValue :: PandocMonad m => Text -> MarkdownParser m (F MetaValue) -toMetaValue x = toMeta <$> parseFromString' parseBlocks (T.unpack x) - where - toMeta p = do - p' <- p - return $ - case B.toList p' of - [Plain xs] -> MetaInlines xs - [Para xs] - | endsWithNewline x -> MetaBlocks [Para xs] - | otherwise -> MetaInlines xs - bs -> MetaBlocks bs - endsWithNewline t = T.pack "\n" `T.isSuffixOf` t +toMetaValue x = + parseFromString' parser' (T.unpack x) + where parser' = (asInlines <$> ((trimInlinesF . mconcat) + <$> (guard (not endsWithNewline) + *> manyTill inline eof))) + <|> (asBlocks <$> parseBlocks) + asBlocks p = do + p' <- p + return $ MetaBlocks (B.toList p') + asInlines p = do + p' <- p + return $ MetaInlines (B.toList p') + endsWithNewline = T.pack "\n" `T.isSuffixOf` x + -- Note: a standard quoted or unquoted YAML value will + -- not end in a newline, but a "block" set off with + -- `|` or `>` will. yamlToMeta :: PandocMonad m => Yaml.Value -> MarkdownParser m (F MetaValue) @@ -368,13 +374,14 @@ parseMarkdown = do -- lookup to get sourcepos case M.lookup n (stateNotes' st) of Just (pos, _) -> report (NoteDefinedButNotUsed n pos) - Nothing -> error "The impossible happened.") notesDefined + Nothing -> throwError $ + PandocShouldNeverHappenError "note not found") + notesDefined let doc = runF (do Pandoc _ bs <- B.doc <$> blocks meta <- stateMeta' st return $ Pandoc meta bs) st reportLogMessages - (do guardEnabled Ext_east_asian_line_breaks - return $ eastAsianLineBreakFilter doc) <|> return doc + return doc referenceKey :: PandocMonad m => MarkdownParser m (F Blocks) referenceKey = try $ do @@ -488,7 +495,6 @@ parseBlocks = mconcat <$> manyTill block eof block :: PandocMonad m => MarkdownParser m (F Blocks) block = do - pos <- getPosition res <- choice [ mempty <$ blanklines , codeBlockFenced , yamlMetaBlock @@ -514,8 +520,7 @@ block = do , para , plain ] <?> "block" - report $ ParsingTrace - (take 60 $ show $ B.toList $ runF res defaultParserState) pos + trace (take 60 $ show $ B.toList $ runF res defaultParserState) return res -- @@ -680,19 +685,36 @@ specialAttr = do char '-' return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs) +rawAttribute :: PandocMonad m => MarkdownParser m String +rawAttribute = do + char '{' + skipMany spaceChar + char '=' + format <- many1 $ satisfy (\c -> isAlphaNum c || c `elem` "-_") + skipMany spaceChar + char '}' + return format + codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks) codeBlockFenced = try $ do c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~')) <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`')) size <- blockDelimiter (== c) Nothing skipMany spaceChar - attr <- option ([],[],[]) $ - try (guardEnabled Ext_fenced_code_attributes >> attributes) - <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar) + rawattr <- + (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute)) + <|> + (Right <$> option ("",[],[]) + (try (guardEnabled Ext_fenced_code_attributes >> attributes) + <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar))) blankline - contents <- manyTill anyLine (blockDelimiter (== c) (Just size)) + contents <- intercalate "\n" <$> + manyTill anyLine (blockDelimiter (== c) (Just size)) blanklines - return $ return $ B.codeBlockWith attr $ intercalate "\n" contents + return $ return $ + case rawattr of + Left syn -> B.rawBlock syn contents + Right attr -> B.codeBlockWith attr contents -- correctly handle github language identifiers toLanguageId :: String -> String @@ -1013,7 +1035,8 @@ para = try $ do result' <- result case B.toList result' of [Image attr alt (src,tit)] - | Ext_implicit_figures `extensionEnabled` exts -> + | not (null alt) && + Ext_implicit_figures `extensionEnabled` exts -> -- the fig: at beginning of title indicates a figure return $ B.para $ B.singleton $ Image attr alt (src,'f':'i':'g':':':tit) @@ -1083,10 +1106,11 @@ latexMacro = try $ do rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks) rawTeXBlock = do guardEnabled Ext_raw_tex - result <- (B.rawBlock "latex" . concat <$> - rawLaTeXBlock `sepEndBy1` blankline) - <|> (B.rawBlock "context" . concat <$> + result <- (B.rawBlock "context" . concat <$> rawConTeXtEnvironment `sepEndBy1` blankline) + <|> (B.rawBlock "latex" . concat <$> + rawLaTeXBlock `sepEndBy1` blankline) + spaces return $ return result @@ -1515,17 +1539,24 @@ code :: PandocMonad m => MarkdownParser m (F Inlines) code = try $ do starts <- many1 (char '`') skipSpaces - result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> + result <- (trim . concat) <$> + many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> (char '\n' >> notFollowedBy' blankline >> return " ")) (try (skipSpaces >> count (length starts) (char '`') >> notFollowedBy (char '`'))) - attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes - >> attributes) - return $ return $ B.codeWith attr $ trim $ concat result + rawattr <- + (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute)) + <|> + (Right <$> option ("",[],[]) + (try (guardEnabled Ext_inline_code_attributes >> attributes))) + return $ return $ + case rawattr of + Left syn -> B.rawInline syn result + Right attr -> B.codeWith attr result math :: PandocMonad m => MarkdownParser m (F Inlines) -math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) - <|> (return . B.math <$> (mathInline >>= applyMacros')) <+?> +math = (return . B.displayMath <$> (mathDisplay >>= applyMacros)) + <|> (return . B.math <$> (mathInline >>= applyMacros)) <+?> (guardEnabled Ext_smart *> (return <$> apostrophe) <* notFollowedBy (space <|> satisfy isPunctuation)) @@ -1849,9 +1880,8 @@ rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex lookAhead (char '\\') notFollowedBy' rawConTeXtEnvironment - RawInline _ s <- rawLaTeXInline - return $ return $ B.rawInline "tex" s - -- "tex" because it might be context or latex + s <- rawLaTeXInline + return $ return $ B.rawInline "tex" s -- "tex" because it might be context rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String rawConTeXtEnvironment = try $ do diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index a3ff60c14..a7f073d50 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -52,13 +52,14 @@ import qualified Data.Set as Set import Text.HTML.TagSoup import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Class (PandocMonad(..)) import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (nested) import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag) -import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines, trim) +import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines, trim, + crFilter) import Text.Pandoc.Walk (walk) import Text.Pandoc.XML (fromEntities) @@ -77,7 +78,7 @@ readMediaWiki opts s = do , mwLogMessages = [] , mwInTT = False } - (unpack s ++ "\n") + (unpack (crFilter s) ++ "\n") case parsed of Right result -> return result Left e -> throwError e @@ -205,7 +206,6 @@ parseMediaWiki = do block :: PandocMonad m => MWParser m Blocks block = do - pos <- getPosition res <- mempty <$ skipMany1 blankline <|> table <|> header @@ -218,7 +218,7 @@ block = do <|> blockTag <|> (B.rawBlock "mediawiki" <$> template) <|> para - report $ ParsingTrace (take 60 $ show $ B.toList res) pos + trace (take 60 $ show $ B.toList res) return res para :: PandocMonad m => MWParser m Blocks diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs new file mode 100644 index 000000000..1ae73c148 --- /dev/null +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -0,0 +1,607 @@ +{- + Copyright (C) 2017 Alexander Krotov <ilabdsf@gmail.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Muse + Copyright : Copyright (C) 2017 Alexander Krotov + License : GNU GPL, version 2 or above + + Maintainer : Alexander Krotov <ilabdsf@gmail.com> + Stability : alpha + Portability : portable + +Conversion of Muse text to 'Pandoc' document. +-} +{- +TODO: +- {{{ }}} syntax for <example> +- Page breaks (five "*") +- Headings with anchors (make it round trip with Muse writer) +- <verse> and ">" +- Definition lists +- Org tables +- table.el tables +- Images with attributes (floating and width) +- Anchors +- Citations and <biblio> +- <play> environment +- <verbatim> tag +-} +module Text.Pandoc.Readers.Muse (readMuse) where + +import Control.Monad +import Control.Monad.Except (throwError) +import qualified Data.Map as M +import Data.Text (Text, unpack) +import Data.List (stripPrefix) +import Data.Maybe (fromMaybe) +import Text.HTML.TagSoup +import Text.Pandoc.Builder (Blocks, Inlines) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad(..)) +import Text.Pandoc.Definition +import Text.Pandoc.Logging +import Text.Pandoc.Options +import Text.Pandoc.Shared (crFilter) +import Text.Pandoc.Parsing hiding (nested) +import Text.Pandoc.Readers.HTML (htmlTag) +import Text.Pandoc.XML (fromEntities) +import System.FilePath (takeExtension) + +-- | Read Muse from an input string and return a Pandoc document. +readMuse :: PandocMonad m + => ReaderOptions + -> Text + -> m Pandoc +readMuse opts s = do + res <- readWithM parseMuse def{ stateOptions = opts } (unpack (crFilter s)) + case res of + Left e -> throwError e + Right d -> return d + +type MuseParser = ParserT String ParserState + +-- +-- main parser +-- + +parseMuse :: PandocMonad m => MuseParser m Pandoc +parseMuse = do + many directive + blocks <- parseBlocks + st <- getState + let doc = runF (do Pandoc _ bs <- B.doc <$> blocks + meta <- stateMeta' st + return $ Pandoc meta bs) st + reportLogMessages + return doc + +parseBlocks :: PandocMonad m => MuseParser m (F Blocks) +parseBlocks = do + res <- mconcat <$> many block + spaces + eof + return res + +-- +-- utility functions +-- + +nested :: PandocMonad m => MuseParser m a -> MuseParser m a +nested p = do + nestlevel <- stateMaxNestingLevel <$> getState + guard $ nestlevel > 0 + updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 } + res <- p + updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } + return res + +htmlElement :: PandocMonad m => String -> MuseParser m (Attr, String) +htmlElement tag = try $ do + (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) + content <- manyTill anyChar (endtag <|> endofinput) + return (htmlAttrToPandoc attr, trim content) + where + endtag = void $ htmlTag (~== TagClose tag) + endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof + trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse + +htmlAttrToPandoc :: [Attribute String] -> Attr +htmlAttrToPandoc attrs = (ident, classes, keyvals) + where + ident = fromMaybe "" $ lookup "id" attrs + classes = maybe [] words $ lookup "class" attrs + keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + +parseHtmlContentWithAttrs :: PandocMonad m + => String -> MuseParser m a -> MuseParser m (Attr, [a]) +parseHtmlContentWithAttrs tag parser = do + (attr, content) <- htmlElement tag + parsedContent <- try $ parseContent content + return (attr, parsedContent) + where + parseContent = parseFromString $ nested $ manyTill parser endOfContent + endOfContent = try $ skipMany blankline >> skipSpaces >> eof + +parseHtmlContent :: PandocMonad m => String -> MuseParser m a -> MuseParser m [a] +parseHtmlContent tag p = liftM snd (parseHtmlContentWithAttrs tag p) + +-- +-- directive parsers +-- + +parseDirective :: PandocMonad m => MuseParser m (String, F Inlines) +parseDirective = do + char '#' + key <- many letter + space + spaces + raw <- many $ noneOf "\n" + newline + value <- parseFromString (trimInlinesF . mconcat <$> many inline) raw + return (key, value) + +directive :: PandocMonad m => MuseParser m () +directive = do + (key, value) <- parseDirective + updateState $ \st -> st { stateMeta' = B.setMeta key <$> value <*> stateMeta' st } + +-- +-- block parsers +-- + +block :: PandocMonad m => MuseParser m (F Blocks) +block = do + res <- mempty <$ skipMany1 blankline + <|> blockElements + <|> para + skipMany blankline + trace (take 60 $ show $ B.toList $ runF res defaultParserState) + return res + +blockElements :: PandocMonad m => MuseParser m (F Blocks) +blockElements = choice [ comment + , separator + , header + , exampleTag + , literal + , centerTag + , rightTag + , quoteTag + , bulletList + , orderedList + , table + , commentTag + , indentedBlock + , noteBlock + ] + +comment :: PandocMonad m => MuseParser m (F Blocks) +comment = try $ do + char ';' + space + many $ noneOf "\n" + void newline <|> eof + return mempty + +separator :: PandocMonad m => MuseParser m (F Blocks) +separator = try $ do + string "----" + many $ char '-' + many spaceChar + void newline <|> eof + return $ return B.horizontalRule + +header :: PandocMonad m => MuseParser m (F Blocks) +header = try $ do + st <- stateParserContext <$> getState + q <- stateQuoteContext <$> getState + getPosition >>= \pos -> guard (st == NullState && q == NoQuote && sourceColumn pos == 1) + level <- liftM length $ many1 $ char '*' + guard $ level <= 5 + skipSpaces + content <- trimInlinesF . mconcat <$> manyTill inline newline + attr <- registerHeader ("", [], []) (runF content defaultParserState) + return $ B.headerWith attr level <$> content + +exampleTag :: PandocMonad m => MuseParser m (F Blocks) +exampleTag = liftM (return . uncurry B.codeBlockWith) $ htmlElement "example" + +literal :: PandocMonad m => MuseParser m (F Blocks) +literal = liftM (return . rawBlock) $ htmlElement "literal" + where + format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs + rawBlock (attrs, content) = B.rawBlock (format attrs) content + +blockTag :: PandocMonad m + => (Blocks -> Blocks) + -> String + -> MuseParser m (F Blocks) +blockTag f s = do + res <- parseHtmlContent s block + return $ f <$> mconcat res + +-- <center> tag is ignored +centerTag :: PandocMonad m => MuseParser m (F Blocks) +centerTag = blockTag id "center" + +-- <right> tag is ignored +rightTag :: PandocMonad m => MuseParser m (F Blocks) +rightTag = blockTag id "right" + +quoteTag :: PandocMonad m => MuseParser m (F Blocks) +quoteTag = blockTag B.blockQuote "quote" + +commentTag :: PandocMonad m => MuseParser m (F Blocks) +commentTag = parseHtmlContent "comment" block >> return mempty + +-- Indented block is either center, right or quote +indentedLine :: PandocMonad m => MuseParser m (Int, String) +indentedLine = try $ do + indent <- length <$> many1 spaceChar + line <- anyLine + return (indent, line) + +rawIndentedBlock :: PandocMonad m => MuseParser m (Int, String) +rawIndentedBlock = try $ do + lns <- many1 indentedLine + let indent = minimum $ map fst lns + return (indent, unlines $ map snd lns) + +indentedBlock :: PandocMonad m => MuseParser m (F Blocks) +indentedBlock = try $ do + (indent, raw) <- rawIndentedBlock + contents <- withQuoteContext InDoubleQuote $ parseFromString parseBlocks raw + return $ (if indent >= 2 && indent < 6 then B.blockQuote else id) <$> contents + +para :: PandocMonad m => MuseParser m (F Blocks) +para = liftM B.para . trimInlinesF . mconcat <$> many1Till inline endOfParaElement + where + endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement + endOfInput = try $ skipMany blankline >> skipSpaces >> eof + endOfPara = try $ blankline >> skipMany1 blankline + newBlockElement = try $ blankline >> void blockElements + +noteMarker :: PandocMonad m => MuseParser m String +noteMarker = try $ do + char '[' + many1Till digit $ char ']' + +noteBlock :: PandocMonad m => MuseParser m (F Blocks) +noteBlock = try $ do + pos <- getPosition + ref <- noteMarker <* skipSpaces + content <- mconcat <$> blocksTillNote + oldnotes <- stateNotes' <$> getState + case M.lookup ref oldnotes of + Just _ -> logMessage $ DuplicateNoteReference ref pos + Nothing -> return () + updateState $ \s -> s{ stateNotes' = M.insert ref (pos, content) oldnotes } + return mempty + where + blocksTillNote = + many1Till block (eof <|> () <$ lookAhead noteMarker) + +-- +-- lists +-- + +listLine :: PandocMonad m => Int -> MuseParser m String +listLine markerLength = try $ do + notFollowedBy blankline + indentWith markerLength + anyLineNewline + +withListContext :: PandocMonad m => MuseParser m a -> MuseParser m a +withListContext p = do + state <- getState + let oldContext = stateParserContext state + setState $ state { stateParserContext = ListItemState } + parsed <- p + updateState (\st -> st {stateParserContext = oldContext}) + return parsed + +listContinuation :: PandocMonad m => Int -> MuseParser m String +listContinuation markerLength = try $ do + blanks <- many1 blankline + result <- many1 $ listLine markerLength + return $ blanks ++ concat result + +listStart :: PandocMonad m => MuseParser m Int -> MuseParser m Int +listStart marker = try $ do + preWhitespace <- length <$> many spaceChar + st <- stateParserContext <$> getState + getPosition >>= \pos -> guard (st == ListItemState || sourceColumn pos /= 1) + markerLength <- marker + postWhitespace <- length <$> many1 spaceChar + return $ preWhitespace + markerLength + postWhitespace + +listItem :: PandocMonad m => MuseParser m Int -> MuseParser m (F Blocks) +listItem start = try $ do + markerLength <- start + firstLine <- anyLineNewline + blank <- option "" ("\n" <$ blankline) + restLines <- many $ listLine markerLength + let first = firstLine ++ blank ++ concat restLines + rest <- many $ listContinuation markerLength + parseFromString (withListContext parseBlocks) $ concat (first:rest) ++ "\n" + +bulletListItems :: PandocMonad m => MuseParser m (F [Blocks]) +bulletListItems = sequence <$> many1 (listItem bulletListStart) + +bulletListStart :: PandocMonad m => MuseParser m Int +bulletListStart = listStart (char '-' >> return 1) + +bulletList :: PandocMonad m => MuseParser m (F Blocks) +bulletList = do + listItems <- bulletListItems + return $ B.bulletList <$> listItems + +orderedListStart :: PandocMonad m + => ListNumberStyle + -> ListNumberDelim + -> MuseParser m Int +orderedListStart style delim = listStart (snd <$> withHorizDisplacement (orderedListMarker style delim)) + +orderedList :: PandocMonad m => MuseParser m (F Blocks) +orderedList = try $ do + p@(_, style, delim) <- lookAhead (many spaceChar *> anyOrderedListMarker <* spaceChar) + guard $ style `elem` [Decimal, LowerAlpha, UpperAlpha, LowerRoman, UpperRoman] + guard $ delim == Period + items <- sequence <$> many1 (listItem $ orderedListStart style delim) + return $ B.orderedListWith p <$> items + +-- +-- tables +-- + +data MuseTable = MuseTable + { museTableCaption :: Inlines + , museTableHeaders :: [[Blocks]] + , museTableRows :: [[Blocks]] + , museTableFooters :: [[Blocks]] + } + +data MuseTableElement = MuseHeaderRow (F [Blocks]) + | MuseBodyRow (F [Blocks]) + | MuseFooterRow (F [Blocks]) + | MuseCaption (F Inlines) + +museToPandocTable :: MuseTable -> Blocks +museToPandocTable (MuseTable caption headers body footers) = + B.table caption attrs headRow rows + where ncol = maximum (0 : map length (headers ++ body ++ footers)) + attrs = replicate ncol (AlignDefault, 0.0) + headRow = if null headers then [] else head headers + rows = (if null headers then [] else tail headers) ++ body ++ footers + +museAppendElement :: MuseTable + -> MuseTableElement + -> F MuseTable +museAppendElement tbl element = + case element of + MuseHeaderRow row -> do + row' <- row + return tbl{ museTableHeaders = museTableHeaders tbl ++ [row'] } + MuseBodyRow row -> do + row' <- row + return tbl{ museTableRows = museTableRows tbl ++ [row'] } + MuseFooterRow row-> do + row' <- row + return tbl{ museTableFooters = museTableFooters tbl ++ [row'] } + MuseCaption inlines -> do + inlines' <- inlines + return tbl{ museTableCaption = inlines' } + +tableCell :: PandocMonad m => MuseParser m (F Blocks) +tableCell = try $ do + content <- trimInlinesF . mconcat <$> manyTill inline (lookAhead cellEnd) + return $ B.plain <$> content + where cellEnd = try $ void (many1 spaceChar >> char '|') <|> void newline <|> eof + +tableElements :: PandocMonad m => MuseParser m [MuseTableElement] +tableElements = tableParseElement `sepEndBy1` (void newline <|> eof) + +elementsToTable :: [MuseTableElement] -> F MuseTable +elementsToTable = foldM museAppendElement emptyTable + where emptyTable = MuseTable mempty mempty mempty mempty + +table :: PandocMonad m => MuseParser m (F Blocks) +table = try $ do + rows <- tableElements + let tbl = elementsToTable rows + let pandocTbl = museToPandocTable <$> tbl :: F Blocks + return pandocTbl + +tableParseElement :: PandocMonad m => MuseParser m MuseTableElement +tableParseElement = tableParseHeader + <|> tableParseBody + <|> tableParseFooter + <|> tableParseCaption + +tableParseRow :: PandocMonad m => Int -> MuseParser m (F [Blocks]) +tableParseRow n = try $ do + fields <- tableCell `sepBy2` fieldSep + return $ sequence fields + where p `sepBy2` sep = (:) <$> p <*> many1 (sep >> p) + fieldSep = many1 spaceChar >> count n (char '|') >> (void (many1 spaceChar) <|> void (lookAhead newline)) + +tableParseHeader :: PandocMonad m => MuseParser m MuseTableElement +tableParseHeader = MuseHeaderRow <$> tableParseRow 2 + +tableParseBody :: PandocMonad m => MuseParser m MuseTableElement +tableParseBody = MuseBodyRow <$> tableParseRow 1 + +tableParseFooter :: PandocMonad m => MuseParser m MuseTableElement +tableParseFooter = MuseFooterRow <$> tableParseRow 3 + +tableParseCaption :: PandocMonad m => MuseParser m MuseTableElement +tableParseCaption = try $ do + many spaceChar + string "|+" + contents <- trimInlinesF . mconcat <$> many1Till inline (lookAhead $ string "+|") + string "+|" + return $ MuseCaption contents + +-- +-- inline parsers +-- + +inline :: PandocMonad m => MuseParser m (F Inlines) +inline = choice [ br + , footnote + , strong + , strongTag + , emph + , emphTag + , superscriptTag + , subscriptTag + , strikeoutTag + , link + , code + , codeTag + , whitespace + , str + , symbol + ] <?> "inline" + +footnote :: PandocMonad m => MuseParser m (F Inlines) +footnote = try $ do + ref <- noteMarker + return $ do + notes <- asksF stateNotes' + case M.lookup ref notes of + Nothing -> return $ B.str $ "[" ++ ref ++ "]" + Just (_pos, contents) -> do + st <- askF + let contents' = runF contents st { stateNotes' = M.empty } + return $ B.note contents' + +whitespace :: PandocMonad m => MuseParser m (F Inlines) +whitespace = liftM return (lb <|> regsp) + where lb = try $ skipMany spaceChar >> linebreak >> return B.space + regsp = try $ skipMany1 spaceChar >> return B.space + +br :: PandocMonad m => MuseParser m (F Inlines) +br = try $ do + string "<br>" + return $ return B.linebreak + +linebreak :: PandocMonad m => MuseParser m (F Inlines) +linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) + where lastNewline = do + eof + return $ return mempty + innerNewline = return $ return B.space + +emphasisBetween :: (PandocMonad m, Show a) => MuseParser m a -> MuseParser m (F Inlines) +emphasisBetween c = try $ enclosedInlines c c + +enclosedInlines :: (PandocMonad m, Show a, Show b) + => MuseParser m a + -> MuseParser m b + -> MuseParser m (F Inlines) +enclosedInlines start end = try $ + trimInlinesF . mconcat <$> enclosed start end inline + +verbatimBetween :: PandocMonad m + => Char + -> MuseParser m String +verbatimBetween c = try $ do + char c + many1Till anyChar $ char c + +inlineTag :: PandocMonad m + => (Inlines -> Inlines) + -> String + -> MuseParser m (F Inlines) +inlineTag f s = do + res <- parseHtmlContent s inline + return $ f <$> mconcat res + +strongTag :: PandocMonad m => MuseParser m (F Inlines) +strongTag = inlineTag B.strong "strong" + +strong :: PandocMonad m => MuseParser m (F Inlines) +strong = fmap B.strong <$> emphasisBetween (string "**") + +emph :: PandocMonad m => MuseParser m (F Inlines) +emph = fmap B.emph <$> emphasisBetween (char '*') + +emphTag :: PandocMonad m => MuseParser m (F Inlines) +emphTag = inlineTag B.emph "em" + +superscriptTag :: PandocMonad m => MuseParser m (F Inlines) +superscriptTag = inlineTag B.superscript "sup" + +subscriptTag :: PandocMonad m => MuseParser m (F Inlines) +subscriptTag = inlineTag B.subscript "sub" + +strikeoutTag :: PandocMonad m => MuseParser m (F Inlines) +strikeoutTag = inlineTag B.strikeout "del" + +code :: PandocMonad m => MuseParser m (F Inlines) +code = try $ do + pos <- getPosition + sp <- if sourceColumn pos == 1 + then pure mempty + else skipMany1 spaceChar >> pure B.space + cd <- verbatimBetween '=' + notFollowedBy nonspaceChar + return $ return (sp B.<> B.code cd) + +codeTag :: PandocMonad m => MuseParser m (F Inlines) +codeTag = do + (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar + return $ return $ B.codeWith attrs $ fromEntities content + +str :: PandocMonad m => MuseParser m (F Inlines) +str = liftM (return . B.str) (many1 alphaNum <|> count 1 characterReference) + +symbol :: PandocMonad m => MuseParser m (F Inlines) +symbol = liftM (return . B.str) $ count 1 nonspaceChar + +link :: PandocMonad m => MuseParser m (F Inlines) +link = try $ do + st <- getState + guard $ stateAllowLinks st + setState $ st{ stateAllowLinks = False } + (url, title, content) <- linkText + setState $ st{ stateAllowLinks = True } + return $ case stripPrefix "URL:" url of + Nothing -> if isImageUrl url + then B.image url title <$> fromMaybe (return mempty) content + else B.link url title <$> fromMaybe (return $ B.str url) content + Just url' -> B.link url' title <$> fromMaybe (return $ B.str url') content + where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el + imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"] + isImageUrl = (`elem` imageExtensions) . takeExtension + +linkContent :: PandocMonad m => MuseParser m (F Inlines) +linkContent = do + char '[' + res <- many1Till anyChar $ char ']' + parseFromString (mconcat <$> many1 inline) res + +linkText :: PandocMonad m => MuseParser m (String, String, Maybe (F Inlines)) +linkText = do + string "[[" + url <- many1Till anyChar $ char ']' + content <- optionMaybe linkContent + char ']' + return (url, "", content) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 591d7590e..c25ace800 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -1,6 +1,6 @@ {-# LANGUAGE FlexibleContexts #-} module Text.Pandoc.Readers.OPML ( readOPML ) where -import Control.Monad.State +import Control.Monad.State.Strict import Data.Char (toUpper) import Data.Text (Text, unpack, pack) import Data.Default @@ -9,6 +9,7 @@ import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Builder import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Options +import Text.Pandoc.Shared (crFilter) import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Readers.Markdown (readMarkdown) import Text.XML.Light @@ -32,7 +33,8 @@ instance Default OPMLState where readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readOPML _ inp = do (bs, st') <- flip runStateT def - (mapM parseBlock $ normalizeTree $ parseXML (unpack inp)) + (mapM parseBlock $ normalizeTree $ + parseXML (unpack (crFilter inp))) return $ setTitle (opmlDocTitle st') $ setAuthors (opmlDocAuthors st') $ diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs index 4d6a67b8e..8c47cdaf5 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs @@ -124,8 +124,3 @@ instance ChoiceVector SuccessList where spreadChoice = Right . SuccessList . (foldr unTagRight []) . collectNonFailing where unTagRight (Right x) = (x:) unTagRight _ = id - --- | Like 'catMaybes', but for 'Either'. -collectRights :: [Either _l r] -> [r] -collectRights = collectNonFailing . untag . spreadChoice . SuccessList - where untag = fromLeft (error "Unexpected Left") diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs index 1c3e08a7f..428048427 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -71,6 +71,7 @@ import Control.Applicative hiding ( liftA, liftA2 ) import Control.Monad ( MonadPlus ) import Control.Arrow +import Data.Either ( rights ) import qualified Data.Map as M import Data.Default import Data.Maybe @@ -604,7 +605,7 @@ tryAll :: (NameSpaceID nsID) -> XMLConverter nsID extraState b [a] tryAll nsID name a = prepareIteration nsID name >>> iterateS (switchingTheStack a) - >>^ collectRights + >>^ rights -------------------------------------------------------------------------------- -- Matching children diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 5e0d67d10..eaccc251c 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -36,6 +36,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Options import Text.Pandoc.Parsing (reportLogMessages) +import Text.Pandoc.Shared (crFilter) import Control.Monad.Except (throwError) import Control.Monad.Reader (runReaderT) @@ -51,7 +52,7 @@ readOrg :: PandocMonad m readOrg opts s = do parsed <- flip runReaderT def $ readWithM parseOrg (optionsToParserState opts) - (T.unpack s ++ "\n\n") + (T.unpack (crFilter s) ++ "\n\n") case parsed of Right result -> return result Left _ -> throwError $ PandocParseError "problem parsing org" diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 66273e05d..42fdfd4dd 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -826,9 +826,10 @@ maybeRight = either (const Nothing) Just inlineLaTeXCommand :: PandocMonad m => OrgParser m String inlineLaTeXCommand = try $ do rest <- getInput - parsed <- (lift . lift) $ runParserT rawLaTeXInline def "source" rest + st <- getState + parsed <- (lift . lift) $ runParserT rawLaTeXInline st "source" rest case parsed of - Right (RawInline _ cs) -> do + Right cs -> do -- drop any trailing whitespace, those are not be part of the command as -- far as org mode is concerned. let cmdNoSpc = dropWhileEnd isSpace cs diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index 92f868516..fc98213fb 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -33,6 +33,7 @@ module Text.Pandoc.Readers.Org.ParserState , OrgNoteRecord , HasReaderOptions (..) , HasQuoteContext (..) + , HasMacros (..) , TodoMarker (..) , TodoSequence , TodoState (..) @@ -57,14 +58,17 @@ import Control.Monad.Reader (ReaderT, asks, local) import Data.Default (Default (..)) import qualified Data.Map as M import qualified Data.Set as Set +import Data.Text (Text) import Text.Pandoc.Builder (Blocks, Inlines) import Text.Pandoc.Definition (Meta (..), nullMeta) import Text.Pandoc.Logging import Text.Pandoc.Options (ReaderOptions (..)) +import Text.Pandoc.Readers.LaTeX.Types (Macro) import Text.Pandoc.Parsing (Future, HasHeaderMap (..), HasIdentifierList (..), HasIncludeFiles (..), HasLastStrPosition (..), HasLogMessages (..), HasQuoteContext (..), + HasMacros (..), HasReaderOptions (..), ParserContext (..), QuoteContext (..), SourcePos, askF, asksF, returnF, runF, trimInlinesF) @@ -118,6 +122,7 @@ data OrgParserState = OrgParserState , orgStateParserContext :: ParserContext , orgStateTodoSequences :: [TodoSequence] , orgLogMessages :: [LogMessage] + , orgMacros :: M.Map Text Macro } data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext } @@ -148,6 +153,10 @@ instance HasLogMessages OrgParserState where addLogMessage msg st = st{ orgLogMessages = msg : orgLogMessages st } getLogMessages st = reverse $ orgLogMessages st +instance HasMacros OrgParserState where + extractMacros st = orgMacros st + updateMacros f st = st{ orgMacros = f (orgMacros st) } + instance HasIncludeFiles OrgParserState where getIncludeFiles = orgStateIncludeFiles addIncludeFile f st = st { orgStateIncludeFiles = f : orgStateIncludeFiles st } @@ -178,6 +187,7 @@ defaultOrgParserState = OrgParserState , orgStateParserContext = NullState , orgStateTodoSequences = [] , orgLogMessages = [] + , orgMacros = M.empty } optionsToParserState :: ReaderOptions -> OrgParserState diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index fb5f6f2d4..2daf60a89 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -31,7 +31,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion from reStructuredText to 'Pandoc' document. -} module Text.Pandoc.Readers.RST ( readRST ) where -import Control.Monad (guard, liftM, mzero, when) +import Control.Monad (guard, liftM, mzero, when, forM_) import Control.Monad.Identity (Identity(..)) import Control.Monad.Except (throwError) import Data.Char (isHexDigit, isSpace, toLower, toUpper) @@ -68,7 +68,7 @@ readRST :: PandocMonad m -> m Pandoc readRST opts s = do parsed <- (readWithM parseRST) def{ stateOptions = opts } - (T.unpack s ++ "\n\n") + (T.unpack (crFilter s) ++ "\n\n") case parsed of Right result -> return result Left e -> throwError e @@ -170,7 +170,8 @@ parseRST = do -- go through once just to get list of reference keys and notes -- docMinusKeys is the raw document with blanks where the keys were... docMinusKeys <- concat <$> - manyTill (referenceKey <|> noteBlock <|> citationBlock <|> + manyTill (referenceKey <|> anchorDef <|> + noteBlock <|> citationBlock <|> headerBlock <|> lineClump) eof setInput docMinusKeys setPosition startPos @@ -217,6 +218,7 @@ block = choice [ codeBlock , fieldList , include , directive + , anchor , comment , header , hrule @@ -1054,16 +1056,49 @@ stripTicks = reverse . stripTick . reverse . stripTick where stripTick ('`':xs) = xs stripTick xs = xs +referenceNames :: PandocMonad m => RSTParser m [String] +referenceNames = do + let rn = try $ do + string ".. _" + (_, ref) <- withRaw referenceName + char ':' + return ref + first <- rn + rest <- many (try (blanklines *> rn)) + return (first:rest) + regularKey :: PandocMonad m => RSTParser m () regularKey = try $ do - string ".. _" - (_,ref) <- withRaw referenceName - char ':' + -- we allow several references to the same URL, e.g. + -- .. _hello: + -- .. _goodbye: url.com + refs <- referenceNames src <- targetURI - let key = toKey $ stripTicks ref + guard $ not (null src) --TODO: parse width, height, class and name attributes - updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ - stateKeys s } + let keys = map (toKey . stripTicks) refs + forM_ keys $ \key -> + updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ + stateKeys s } + +anchorDef :: PandocMonad m => RSTParser m [Char] +anchorDef = try $ do + (refs, raw) <- withRaw (try (referenceNames <* blanklines)) + let keys = map stripTicks refs + forM_ keys $ \rawkey -> + updateState $ \s -> s { stateKeys = + M.insert (toKey rawkey) (('#':rawkey,""), nullAttr) $ stateKeys s } + -- keep this for 2nd round of parsing, where we'll add the divs (anchor) + return raw + +anchor :: PandocMonad m => RSTParser m Blocks +anchor = try $ do + refs <- referenceNames + blanklines + b <- block + -- put identifier on next block: + let addDiv ref = B.divWith (ref, [], []) + return $ foldr addDiv b refs headerBlock :: PandocMonad m => RSTParser m [Char] headerBlock = do diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index 9e544c4ac..d41152de5 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -42,13 +42,13 @@ import qualified Data.Foldable as F import Data.Maybe (fromMaybe) import Text.HTML.TagSoup import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Class (PandocMonad(..)) import Text.Pandoc.Definition -import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (enclosed, macro, nested) +import Text.Pandoc.Parsing hiding (enclosed, nested) import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) import Text.Pandoc.XML (fromEntities) +import Text.Pandoc.Shared (crFilter) import Data.Text (Text) import qualified Data.Text as T @@ -59,7 +59,7 @@ readTWiki :: PandocMonad m -> m Pandoc readTWiki opts s = do res <- readWithM parseTWiki def{ stateOptions = opts } - (T.unpack s ++ "\n\n") + (T.unpack (crFilter s) ++ "\n\n") case res of Left e -> throwError e Right d -> return d @@ -133,12 +133,11 @@ parseTWiki = do block :: PandocMonad m => TWParser m B.Blocks block = do - pos <- getPosition res <- mempty <$ skipMany1 blankline <|> blockElements <|> para skipMany blankline - report $ ParsingTrace (take 60 $ show $ B.toList res) pos + trace (take 60 $ show $ B.toList res) return res blockElements :: PandocMonad m => TWParser m B.Blocks diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 1669e3e51..853d2768f 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -61,15 +61,14 @@ import Text.HTML.TagSoup (Tag (..), fromAttrib) import Text.HTML.TagSoup.Match import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Class (PandocMonad(..)) import Text.Pandoc.CSS import Text.Pandoc.Definition -import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag) import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline) -import Text.Pandoc.Shared (trim) +import Text.Pandoc.Shared (trim, crFilter) import Data.Text (Text) import qualified Data.Text as T @@ -80,7 +79,7 @@ readTextile :: PandocMonad m -> m Pandoc readTextile opts s = do parsed <- readWithM parseTextile def{ stateOptions = opts } - (T.unpack s ++ "\n\n") + (T.unpack (crFilter s) ++ "\n\n") case parsed of Right result -> return result Left e -> throwError e @@ -143,8 +142,7 @@ blockParsers = [ codeBlock block :: PandocMonad m => ParserT [Char] ParserState m Blocks block = do res <- choice blockParsers <?> "block" - pos <- getPosition - report $ ParsingTrace (take 60 $ show $ B.toList res) pos + trace (take 60 $ show $ B.toList res) return res commentBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks @@ -575,7 +573,7 @@ rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag rawLaTeXInline' :: PandocMonad m => ParserT [Char] ParserState m Inlines rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex - B.singleton <$> rawLaTeXInline + B.rawInline "latex" <$> rawLaTeXInline -- | Textile standard link syntax is "label":target. But we -- can also have ["label":target]. diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 260bb7fff..f000646c2 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -40,8 +40,8 @@ import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Definition import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (macro, space, spaces, uri) -import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI) +import Text.Pandoc.Parsing hiding (space, spaces, uri) +import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI, crFilter) import Control.Monad (guard, void, when) import Control.Monad.Reader (Reader, asks, runReader) import Data.Default @@ -95,7 +95,9 @@ readTxt2Tags :: PandocMonad m -> m Pandoc readTxt2Tags opts s = do meta <- getT2TMeta - let parsed = flip runReader meta $ readWithM parseT2T (def {stateOptions = opts}) (T.unpack s ++ "\n\n") + let parsed = flip runReader meta $ + readWithM parseT2T (def {stateOptions = opts}) $ + T.unpack (crFilter s) ++ "\n\n" case parsed of Right result -> return $ result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs new file mode 100644 index 000000000..52bf37d35 --- /dev/null +++ b/src/Text/Pandoc/Readers/Vimwiki.hs @@ -0,0 +1,673 @@ +{- + Copyright (C) 2017 Yuchen Pei <me@ypei.me> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Vimwiki + Copyright : Copyright (C) 2017 Yuchen Pei + License : GNU GPL, version 2 or above + + Maintainer : Yuchen Pei <me@ypei.me> + Stability : alpha + Portability : portable + +Conversion of vimwiki text to 'Pandoc' document. +-} +{-- +[X]: implemented +[O]: not implemented +* block parsers: + * [X] header + * [X] hrule + * [X] comment + * [X] blockquote + * [X] preformatted -- using codeblock + * [X] displaymath + * [X] bulletlist / orderedlist + * [X] todo lists -- using span. + * [X] table + * [X] centered table -- using div + * [O] colspan and rowspan -- see issue #1024 + * [X] paragraph + * [X] definition list +* inline parsers: + * [X] bareURL + * [X] strong + * [X] emph + * [X] strikeout + * [X] code + * [X] link + * [X] image + * [X] inline math + * [X] tag + * [X] sub- and super-scripts +* misc: + * [X] `TODO:` mark + * [X] metadata placeholders: %title and %date + * [O] control placeholders: %template and %nohtml -- ignored +--} + +module Text.Pandoc.Readers.Vimwiki ( readVimwiki + ) where +import Control.Monad.Except (throwError) +import Control.Monad (guard) +import Data.Default +import Data.Maybe +import Data.Monoid ((<>)) +import Data.List (isInfixOf, isPrefixOf) +import Data.Text (Text, unpack) +import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, fromList, toList) +import qualified Text.Pandoc.Builder + as B (headerWith, str, space, strong, emph, strikeout, code, link, image, + spanWith, para, horizontalRule, blockQuote, bulletList, plain, + orderedList, simpleTable, softbreak, codeBlockWith, imageWith, divWith, + setMeta, definitionList, superscript, subscript, displayMath, + math) +import Text.Pandoc.Class (PandocMonad(..)) +import Text.Pandoc.Definition (Pandoc(..), Inline(Space), + Block(BulletList, OrderedList), Attr, nullMeta, Meta, ListNumberStyle(..), + ListNumberDelim(..)) +import Text.Pandoc.Options (ReaderOptions) +import Text.Pandoc.Parsing (readWithM, ParserT, stateOptions, ParserState, + stateMeta', blanklines, registerHeader, spaceChar, emailAddress, uri, F, runF, + orderedListMarker, many1Till) +import Text.Pandoc.Shared (splitBy, stripFirstAndLast, stringify, crFilter) +import Text.Parsec.Char (spaces, char, anyChar, newline, string, noneOf, + alphaNum) +import Text.Parsec.Combinator (eof, choice, many1, manyTill, count, skipMany1, + notFollowedBy, option) +import Text.Parsec.Prim (many, try, updateState, getState) +import Text.Parsec.Char (oneOf, space) +import Text.Parsec.Combinator (lookAhead, between) +import Text.Parsec.Prim ((<|>)) + +readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readVimwiki opts s = do + res <- readWithM parseVimwiki def{ stateOptions = opts } + (unpack (crFilter s)) + case res of + Left e -> throwError e + Right result -> return result + +type VwParser = ParserT [Char] ParserState + + +-- constants + +specialChars :: [Char] +specialChars = "=*-#[]_~{}`$|:%^," + +spaceChars :: [Char] +spaceChars = " \t\n" + +-- main parser + +parseVimwiki :: PandocMonad m => VwParser m Pandoc +parseVimwiki = do + bs <- mconcat <$> many block + spaces + eof + st <- getState + let meta = runF (stateMeta' st) st + return $ Pandoc meta (toList bs) + +-- block parser + +block :: PandocMonad m => VwParser m Blocks +block = do + res <- choice [ mempty <$ blanklines + , header + , hrule + , mempty <$ comment + , mixedList + , preformatted + , displayMath + , table + , mempty <$ placeholder + , blockQuote + , definitionList + , para + ] + trace (take 60 $ show $ toList res) + return res + +blockML :: PandocMonad m => VwParser m Blocks +blockML = choice [preformatted, displayMath, table] + +header :: PandocMonad m => VwParser m Blocks +header = try $ do + sp <- many spaceChar + eqs <- many1 (char '=') + spaceChar + let lev = length eqs + guard $ lev <= 6 + contents <- trimInlines . mconcat <$> manyTill inline (try $ spaceChar + >> (string eqs) >> many spaceChar >> newline) + attr <- registerHeader (makeId contents, + (if sp == "" then [] else ["justcenter"]), []) contents + return $ B.headerWith attr lev contents + +para :: PandocMonad m => VwParser m Blocks +para = try $ do + contents <- trimInlines . mconcat <$> many1 inline + if all (==Space) (toList contents) + then return mempty + else return $ B.para contents + +hrule :: PandocMonad m => VwParser m Blocks +hrule = try $ B.horizontalRule <$ (string "----" >> many (char '-') >> newline) + +comment :: PandocMonad m => VwParser m () +comment = try $ do + many spaceChar >> string "%%" >> many (noneOf "\n") + return () + +blockQuote :: PandocMonad m => VwParser m Blocks +blockQuote = try $ do + string " " + contents <- trimInlines . mconcat <$> many1 inlineBQ + if all (==Space) (toList contents) + then return mempty + else return $ B.blockQuote $ B.plain contents + +definitionList :: PandocMonad m => VwParser m Blocks +definitionList = try $ + B.definitionList <$> (many1 (dlItemWithDT <|> dlItemWithoutDT)) + +dlItemWithDT :: PandocMonad m => VwParser m (Inlines, [Blocks]) +dlItemWithDT = do + dt <- definitionTerm + dds <- many definitionDef + return $ (dt, dds) + +dlItemWithoutDT :: PandocMonad m => VwParser m (Inlines, [Blocks]) +dlItemWithoutDT = do + dds <- many1 definitionDef + return $ (mempty, dds) + +definitionDef :: PandocMonad m => VwParser m Blocks +definitionDef = try $ + (notFollowedBy definitionTerm) >> many spaceChar + >> (definitionDef1 <|> definitionDef2) + +definitionDef1 :: PandocMonad m => VwParser m Blocks +definitionDef1 = try $ mempty <$ defMarkerE + +definitionDef2 :: PandocMonad m => VwParser m Blocks +definitionDef2 = try $ B.plain <$> + (defMarkerM >> (trimInlines . mconcat <$> many inline') <* newline) + + +definitionTerm :: PandocMonad m => VwParser m Inlines +definitionTerm = try $ do + x <- definitionTerm1 <|> definitionTerm2 + guard $ (stringify x /= "") + return x + +definitionTerm1 :: PandocMonad m => VwParser m Inlines +definitionTerm1 = try $ + trimInlines . mconcat <$> manyTill inline' (try $ defMarkerE) + +definitionTerm2 :: PandocMonad m => VwParser m Inlines +definitionTerm2 = try $ trimInlines . mconcat <$> manyTill inline' + (try $ lookAhead $ (defMarkerM >> notFollowedBy hasDefMarkerM)) + +defMarkerM :: PandocMonad m => VwParser m Char +defMarkerM = string "::" >> spaceChar + +defMarkerE :: PandocMonad m => VwParser m Char +defMarkerE = string "::" >> newline + +hasDefMarkerM :: PandocMonad m => VwParser m String +hasDefMarkerM = manyTill (noneOf "\n") (try defMarkerM) + +preformatted :: PandocMonad m => VwParser m Blocks +preformatted = try $ do + many spaceChar >> string "{{{" + attrText <- many (noneOf "\n") + lookAhead newline + contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}}" + >> many spaceChar >> newline)) + if (not $ contents == "") && (head contents == '\n') + then return $ B.codeBlockWith (makeAttr attrText) (tail contents) + else return $ B.codeBlockWith (makeAttr attrText) contents + +makeAttr :: String -> Attr +makeAttr s = + let xs = splitBy (`elem` " \t") s in + ("", [], catMaybes $ map nameValue xs) + +nameValue :: String -> Maybe (String, String) +nameValue s = + let t = splitBy (== '=') s in + if length t /= 2 + then Nothing + else let (a, b) = (head t, last t) in + if ((length b) < 2) || ((head b, last b) /= ('"', '"')) + then Nothing + else Just (a, stripFirstAndLast b) + + +displayMath :: PandocMonad m => VwParser m Blocks +displayMath = try $ do + many spaceChar >> string "{{$" + mathTag <- option "" mathTagParser + many space + contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}$" + >> many spaceChar >> newline)) + let contentsWithTags + | mathTag == "" = contents + | otherwise = "\\begin{" ++ mathTag ++ "}\n" ++ contents + ++ "\n\\end{" ++ mathTag ++ "}" + return $ B.para $ B.displayMath contentsWithTags + + +mathTagLaTeX :: String -> String +mathTagLaTeX s = case s of + "equation" -> "" + "equation*" -> "" + "gather" -> "gathered" + "gather*" -> "gathered" + "multline" -> "gathered" + "multline*" -> "gathered" + "eqnarray" -> "aligned" + "eqnarray*" -> "aligned" + "align" -> "aligned" + "align*" -> "aligned" + "alignat" -> "aligned" + "alignat*" -> "aligned" + _ -> s + + +mixedList :: PandocMonad m => VwParser m Blocks +mixedList = try $ do + (bl, _) <- mixedList' (-1) + return $ head bl + +mixedList' :: PandocMonad m => Int -> VwParser m ([Blocks], Int) +mixedList' prevInd = do + (curInd, builder) <- option (-1, "na") (lookAhead listStart) + if curInd < prevInd + then return ([], curInd) + else do + listStart + curLine <- listItemContent + let listBuilder = + if builder == "ul" then B.bulletList else B.orderedList + (subList, lowInd) <- (mixedList' curInd) + if lowInd >= curInd + then do + (sameIndList, endInd) <- (mixedList' lowInd) + let curList = (combineList curLine subList) ++ sameIndList + if curInd > prevInd + then return ([listBuilder curList], endInd) + else return (curList, endInd) + else do + let (curList, endInd) = ((combineList curLine subList), + lowInd) + if curInd > prevInd + then return ([listBuilder curList], endInd) + else return (curList, endInd) + +plainInlineML' :: PandocMonad m => Inlines -> VwParser m Blocks +plainInlineML' w = do + xs <- many inlineML + newline + return $ B.plain $ trimInlines $ mconcat $ w:xs + +plainInlineML :: PandocMonad m => VwParser m Blocks +plainInlineML = (notFollowedBy listStart) >> spaceChar >> plainInlineML' mempty + + +listItemContent :: PandocMonad m => VwParser m Blocks +listItemContent = try $ do + w <- option mempty listTodoMarker + x <- plainInlineML' w + y <- many blocksThenInline + z <- many blockML + return $ mconcat $ x:y ++ z + +blocksThenInline :: PandocMonad m => VwParser m Blocks +blocksThenInline = try $ do + y <- many1 blockML + x <- plainInlineML + return $ mconcat $ y ++ [x] + +listTodoMarker :: PandocMonad m => VwParser m Inlines +listTodoMarker = try $ do + x <- between (many spaceChar >> char '[') (char ']' >> spaceChar) + (oneOf " .oOX") + return $ makeListMarkerSpan x + +makeListMarkerSpan :: Char -> Inlines +makeListMarkerSpan x = + let cl = case x of + ' ' -> "done0" + '.' -> "done1" + 'o' -> "done2" + 'O' -> "done3" + 'X' -> "done4" + _ -> "" + in + B.spanWith ("", [cl], []) mempty + +combineList :: Blocks -> [Blocks] -> [Blocks] +combineList x [y] = case toList y of + [BulletList z] -> [fromList $ (toList x) + ++ [BulletList z]] + [OrderedList attr z] -> [fromList $ (toList x) + ++ [OrderedList attr z]] + _ -> x:[y] +combineList x xs = x:xs + +listStart :: PandocMonad m => VwParser m (Int, String) +listStart = try $ do + s <- many spaceChar + listType <- bulletListMarkers <|> orderedListMarkers + spaceChar + return (length s, listType) + +bulletListMarkers :: PandocMonad m => VwParser m String +bulletListMarkers = "ul" <$ (char '*' <|> char '-') + +orderedListMarkers :: PandocMonad m => VwParser m String +orderedListMarkers = + ("ol" <$ (choice $ (orderedListMarker Decimal Period):(($OneParen) + <$> orderedListMarker + <$> [Decimal, LowerRoman, UpperRoman, LowerAlpha, UpperAlpha]))) + <|> ("ol" <$ char '#') + +--many need trimInlines +table :: PandocMonad m => VwParser m Blocks +table = try $ do + indent <- lookAhead (many spaceChar) + (th, trs) <- table1 <|> table2 + let tab = B.simpleTable th trs + if indent == "" + then return tab + else return $ B.divWith ("", ["center"], []) tab + +-- table with header +table1 :: PandocMonad m => VwParser m ([Blocks], [[Blocks]]) +table1 = try $ do + th <- tableRow + many1 tableHeaderSeparator + trs <- many tableRow + return (th, trs) + +-- headerless table +table2 :: PandocMonad m => VwParser m ([Blocks], [[Blocks]]) +table2 = try $ do + trs <- many1 tableRow + return (take (length $ head trs) $ repeat mempty, trs) + +tableHeaderSeparator :: PandocMonad m => VwParser m () +tableHeaderSeparator = try $ do + many spaceChar >> char '|' >> many1 ((many1 $ char '-') >> char '|') + >> many spaceChar >> newline + return () + +tableRow :: PandocMonad m => VwParser m [Blocks] +tableRow = try $ do + many spaceChar >> char '|' + s <- lookAhead $ manyTill anyChar (try (char '|' >> many spaceChar + >> newline)) + guard $ not $ "||" `isInfixOf` ("|" ++ s ++ "|") + tr <- many tableCell + many spaceChar >> char '\n' + return tr + +tableCell :: PandocMonad m => VwParser m Blocks +tableCell = try $ + B.plain <$> trimInlines . mconcat <$> (manyTill inline' (char '|')) + +placeholder :: PandocMonad m => VwParser m () +placeholder = try $ + (choice (ph <$> ["title", "date"])) <|> noHtmlPh <|> templatePh + +ph :: PandocMonad m => String -> VwParser m () +ph s = try $ do + many spaceChar >> (string $ '%':s) >> spaceChar + contents <- (trimInlines . mconcat <$> (manyTill inline (lookAhead newline))) + --use lookAhead because of placeholder in the whitespace parser + let meta' = return $ B.setMeta s contents nullMeta :: F Meta + updateState $ \st -> st { stateMeta' = stateMeta' st <> meta' } + +noHtmlPh :: PandocMonad m => VwParser m () +noHtmlPh = try $ + () <$ (many spaceChar >> string "%nohtml" >> many spaceChar + >> (lookAhead newline)) + +templatePh :: PandocMonad m => VwParser m () +templatePh = try $ + () <$ (many spaceChar >> string "%template" >> (many $ noneOf "\n") + >> (lookAhead newline)) + +-- inline parser + +inline :: PandocMonad m => VwParser m Inlines +inline = choice $ (whitespace endlineP):inlineList + +inlineList :: PandocMonad m => [VwParser m Inlines] +inlineList = [ bareURL + , todoMark + , str + , strong + , emph + , strikeout + , code + , link + , image + , inlineMath + , tag + , superscript + , subscript + , special + ] + +-- inline parser without softbreaks or comment breaks +inline' :: PandocMonad m => VwParser m Inlines +inline' = choice $ whitespace':inlineList + +-- inline parser for blockquotes +inlineBQ :: PandocMonad m => VwParser m Inlines +inlineBQ = choice $ (whitespace endlineBQ):inlineList + +-- inline parser for mixedlists +inlineML :: PandocMonad m => VwParser m Inlines +inlineML = choice $ (whitespace endlineML):inlineList + +str :: PandocMonad m => VwParser m Inlines +str = B.str <$> (many1 $ noneOf $ spaceChars ++ specialChars) + +whitespace :: PandocMonad m => VwParser m () -> VwParser m Inlines +whitespace endline = B.space <$ (skipMany1 spaceChar <|> + (try (newline >> (comment <|> placeholder)))) + <|> B.softbreak <$ endline + +whitespace' :: PandocMonad m => VwParser m Inlines +whitespace' = B.space <$ skipMany1 spaceChar + +special :: PandocMonad m => VwParser m Inlines +special = B.str <$> count 1 (oneOf specialChars) + +bareURL :: PandocMonad m => VwParser m Inlines +bareURL = try $ do + (orig, src) <- uri <|> emailAddress + return $ B.link src "" (B.str orig) + +strong :: PandocMonad m => VwParser m Inlines +strong = try $ do + s <- lookAhead $ between (char '*') (char '*') (many1 $ noneOf "*") + guard $ (not $ (head s) `elem` spaceChars) + && (not $ (last s) `elem` spaceChars) + char '*' + contents <- mconcat <$> (manyTill inline' $ char '*' + >> notFollowedBy alphaNum) + return $ (B.spanWith ((makeId contents), [], []) mempty) + <> (B.strong contents) + +makeId :: Inlines -> String +makeId i = concat (stringify <$> (toList i)) + +emph :: PandocMonad m => VwParser m Inlines +emph = try $ do + s <- lookAhead $ between (char '_') (char '_') (many1 $ noneOf "_") + guard $ (not $ (head s) `elem` spaceChars) + && (not $ (last s) `elem` spaceChars) + char '_' + contents <- mconcat <$> (manyTill inline' $ char '_' + >> notFollowedBy alphaNum) + return $ B.emph contents + +strikeout :: PandocMonad m => VwParser m Inlines +strikeout = try $ do + string "~~" + contents <- mconcat <$> (many1Till inline' $ string $ "~~") + return $ B.strikeout contents + +code :: PandocMonad m => VwParser m Inlines +code = try $ do + char '`' + contents <- many1Till (noneOf "\n") (char '`') + return $ B.code contents + +superscript :: PandocMonad m => VwParser m Inlines +superscript = try $ + B.superscript <$> mconcat <$> (char '^' >> many1Till inline' (char '^')) + +subscript :: PandocMonad m => VwParser m Inlines +subscript = try $ + B.subscript <$> mconcat <$> (string ",," + >> many1Till inline' (try $ string ",,")) + +link :: PandocMonad m => VwParser m Inlines +link = try $ do + string "[[" + contents <- lookAhead $ manyTill anyChar (string "]]") + case '|' `elem` contents of + False -> do + manyTill anyChar (string "]]") +-- not using try here because [[hell]o]] is not rendered as a link in vimwiki + return $ B.link (procLink contents) "" (B.str contents) + True -> do + url <- manyTill anyChar $ char '|' + lab <- mconcat <$> (manyTill inline $ string "]]") + return $ B.link (procLink url) "" lab + +image :: PandocMonad m => VwParser m Inlines +image = try $ do + string "{{" + contentText <- lookAhead $ manyTill (noneOf "\n") (try $ string "}}") + images $ length $ filter (== '|') contentText + +images :: PandocMonad m => Int -> VwParser m Inlines +images k + | k == 0 = do + imgurl <- manyTill anyChar (try $ string "}}") + return $ B.image (procImgurl imgurl) "" (B.str "") + | k == 1 = do + imgurl <- manyTill anyChar (char '|') + alt <- mconcat <$> (manyTill inline $ (try $ string "}}")) + return $ B.image (procImgurl imgurl) "" alt + | k == 2 = do + imgurl <- manyTill anyChar (char '|') + alt <- mconcat <$> (manyTill inline $ char '|') + attrText <- manyTill anyChar (try $ string "}}") + return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt + | otherwise = do + imgurl <- manyTill anyChar (char '|') + alt <- mconcat <$> (manyTill inline $ char '|') + attrText <- manyTill anyChar (char '|') + manyTill anyChar (try $ string "}}") + return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt + +procLink' :: String -> String +procLink' s + | ((take 6 s) == "local:") = "file" ++ (drop 5 s) + | ((take 6 s) == "diary:") = "diary/" ++ (drop 6 s) ++ ".html" + | or ((`isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:", + "news:", "telnet:" ]) + = s + | s == "" = "" + | (last s) == '/' = s + | otherwise = s ++ ".html" + +procLink :: String -> String +procLink s = procLink' x ++ y + where (x, y) = break (=='#') s + +procImgurl :: String -> String +procImgurl s = if ((take 6 s) == "local:") then "file" ++ (drop 5 s) else s + +inlineMath :: PandocMonad m => VwParser m Inlines +inlineMath = try $ do + char '$' + contents <- many1Till (noneOf "\n") (char '$') + return $ B.math contents + +tag :: PandocMonad m => VwParser m Inlines +tag = try $ do + char ':' + s <- manyTill (noneOf spaceChars) (try (char ':' >> (lookAhead space))) + guard $ not $ "::" `isInfixOf` (":" ++ s ++ ":") + let ss = splitBy (==':') s + return $ mconcat $ (makeTagSpan' $ head ss):(makeTagSpan <$> (tail ss)) + +todoMark :: PandocMonad m => VwParser m Inlines +todoMark = try $ do + string "TODO:" + return $ B.spanWith ("", ["todo"], []) (B.str "TODO:") + +-- helper functions and parsers +endlineP :: PandocMonad m => VwParser m () +endlineP = () <$ try (newline <* nFBTTBSB <* notFollowedBy blockQuote) + +endlineBQ :: PandocMonad m => VwParser m () +endlineBQ = () <$ try (newline <* nFBTTBSB <* string " ") + +endlineML :: PandocMonad m => VwParser m () +endlineML = () <$ try (newline <* nFBTTBSB <* many1 spaceChar) + +--- nFBTTBSB is short for notFollowedByThingsThatBreakSoftBreaks +nFBTTBSB :: PandocMonad m => VwParser m () +nFBTTBSB = + notFollowedBy newline <* + notFollowedBy hrule <* + notFollowedBy tableRow <* + notFollowedBy header <* + notFollowedBy listStart <* + notFollowedBy preformatted <* + notFollowedBy displayMath <* + notFollowedBy hasDefMarker + +hasDefMarker :: PandocMonad m => VwParser m () +hasDefMarker = () <$ (manyTill (noneOf "\n") (string "::" >> oneOf spaceChars)) + +makeTagSpan' :: String -> Inlines +makeTagSpan' s = B.spanWith ('-' : s, [], []) (B.str "") <> + B.spanWith (s, ["tag"], []) (B.str s) + +makeTagSpan :: String -> Inlines +makeTagSpan s = (B.space) <> (makeTagSpan' s) + +mathTagParser :: PandocMonad m => VwParser m String +mathTagParser = do + s <- try $ lookAhead (char '%' >> (manyTill (noneOf spaceChars) + (try $ char '%' >> many (noneOf $ '%':spaceChars) >> space))) + char '%' >> string s >> char '%' + return $ mathTagLaTeX s diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 745e809d0..53fd38ffd 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -49,6 +49,7 @@ module Text.Pandoc.Shared ( toRomanNumeral, escapeURI, tabFilter, + crFilter, -- * Date/time normalizeDate, -- * Pandoc block and inline list processing @@ -117,7 +118,7 @@ import Text.Pandoc.MIME (MimeType) import Text.Pandoc.Error (PandocError(..)) import System.FilePath ( (</>) ) import Data.Generics (Typeable, Data) -import qualified Control.Monad.State as S +import qualified Control.Monad.State.Strict as S import qualified Control.Exception as E import Control.Monad (msum, unless, MonadPlus(..)) import Text.Pandoc.Pretty (charWidth) @@ -279,13 +280,12 @@ escapeURI = escapeURIString (not . needsEscaping) where needsEscaping c = isSpace c || c `elem` ['<','>','|','"','{','}','[',']','^', '`'] --- | Convert tabs to spaces and filter out DOS line endings. --- Tabs will be preserved if tab stop is set to 0. +-- | Convert tabs to spaces. Tabs will be preserved if tab stop is set to 0. tabFilter :: Int -- ^ Tab stop -> T.Text -- ^ Input -> T.Text -tabFilter tabStop = T.filter (/= '\r') . T.unlines . - (if tabStop == 0 then id else map go) . T.lines +tabFilter 0 = id +tabFilter tabStop = T.unlines . map go . T.lines where go s = let (s1, s2) = T.break (== '\t') s in if T.null s2 @@ -294,6 +294,10 @@ tabFilter tabStop = T.filter (/= '\r') . T.unlines . (tabStop - (T.length s1 `mod` tabStop)) (T.pack " ") <> go (T.drop 1 s2) +-- | Strip out DOS line endings. +crFilter :: T.Text -> T.Text +crFilter = T.filter (/= '\r') + -- -- Date/time -- diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 9b635a97b..1a26b7168 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -33,20 +33,20 @@ A simple templating system with variable substitution and conditionals. -} -module Text.Pandoc.Templates ( renderTemplate +module Text.Pandoc.Templates ( module Text.DocTemplates , renderTemplate' - , TemplateTarget - , varListToJSON - , compileTemplate - , Template - , getDefaultTemplate ) where + , getDefaultTemplate + ) where import qualified Control.Exception as E (IOException, try) +import Control.Monad.Except (throwError) import Data.Aeson (ToJSON (..)) import qualified Data.Text as T import System.FilePath ((<.>), (</>)) import Text.DocTemplates (Template, TemplateTarget, applyTemplate, compileTemplate, renderTemplate, varListToJSON) +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Error import Text.Pandoc.Shared (readDataFileUTF8) -- | Get default template for the specified writer. @@ -72,7 +72,11 @@ getDefaultTemplate user writer = do _ -> let fname = "templates" </> "default" <.> format in E.try $ readDataFileUTF8 user fname --- | Like 'applyTemplate', but raising an error if compilation fails. -renderTemplate' :: (ToJSON a, TemplateTarget b) => String -> a -> b -renderTemplate' template = either error id . applyTemplate (T.pack template) - +-- | Like 'applyTemplate', but runs in PandocMonad and +-- raises an error if compilation fails. +renderTemplate' :: (PandocMonad m, ToJSON a, TemplateTarget b) + => String -> a -> m b +renderTemplate' template context = do + case applyTemplate (T.pack template) context of + Left e -> throwError (PandocTemplateError e) + Right r -> return r diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs index dbe55449f..6dfc1a7b3 100644 --- a/src/Text/Pandoc/Writers.hs +++ b/src/Text/Pandoc/Writers.hs @@ -176,19 +176,16 @@ writers = [ ,("muse" , TextWriter writeMuse) ] -getWriter :: PandocMonad m => String -> Either String (Writer m) +-- | Retrieve writer, extensions based on formatSpec (format+extensions). +getWriter :: PandocMonad m => String -> Either String (Writer m, Extensions) getWriter s = case parseFormatSpec s of Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e] Right (writerName, setExts) -> case lookup writerName writers of Nothing -> Left $ "Unknown writer: " ++ writerName - Just (TextWriter r) -> Right $ TextWriter $ - \o -> r o{ writerExtensions = setExts $ - getDefaultExtensions writerName } - Just (ByteStringWriter r) -> Right $ ByteStringWriter $ - \o -> r o{ writerExtensions = setExts $ - getDefaultExtensions writerName } + Just r -> Right (r, setExts $ + getDefaultExtensions writerName) writeJSON :: WriterOptions -> Pandoc -> Text writeJSON _ = UTF8.toText . BL.toStrict . encode diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 46dbe6eaf..112f8b657 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -37,7 +37,7 @@ that it has omitted the construct. AsciiDoc: <http://www.methods.co.nz/asciidoc/> -} module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc) where -import Control.Monad.State +import Control.Monad.State.Strict import Data.Aeson (Result (..), Value (String), fromJSON, toJSON) import Data.Char (isPunctuation, isSpace) import Data.List (intercalate, intersperse, stripPrefix) @@ -105,7 +105,7 @@ pandocToAsciiDoc opts (Pandoc meta blocks) = do $ metadata' case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Escape special characters for AsciiDoc. escapeString :: String -> String diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs index ed316ced9..63249a7ce 100644 --- a/src/Text/Pandoc/Writers/CommonMark.hs +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -32,7 +32,7 @@ CommonMark: <http://commonmark.org> module Text.Pandoc.Writers.CommonMark (writeCommonMark) where import CMark -import Control.Monad.State (State, get, modify, runState) +import Control.Monad.State.Strict (State, get, modify, runState) import Data.Foldable (foldrM) import Data.Text (Text) import qualified Data.Text as T @@ -58,9 +58,9 @@ writeCommonMark opts (Pandoc meta blocks) = do (inlinesToCommonMark opts) meta let context = defField "body" main $ metadata - return $ case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context processNotes :: Inline -> State [[Block]] Inline processNotes (Note bs) = do diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 2da6a7f9a..3c901cab6 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -1,4 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} {- Copyright (C) 2007-2017 John MacFarlane <jgm@berkeley.edu> @@ -29,12 +30,13 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' format into ConTeXt. -} module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where -import Control.Monad.State +import Control.Monad.State.Strict import Data.Char (ord) import Data.List (intercalate, intersperse) import Data.Maybe (catMaybes) import Data.Text (Text) import Network.URI (unEscapeString) +import Text.Pandoc.BCP47 import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Logging import Text.Pandoc.Definition @@ -88,6 +90,7 @@ pandocToConTeXt options (Pandoc meta blocks) = do ,("top","margin-top") ,("bottom","margin-bottom") ] + mblang <- fromBCP47 (getLang options meta) let context = defField "toc" (writerTableOfContents options) $ defField "placelist" (intercalate ("," :: String) $ take (writerTOCDepth options + @@ -100,14 +103,17 @@ pandocToConTeXt options (Pandoc meta blocks) = do $ defField "body" main $ defField "layout" layoutFromMargins $ defField "number-sections" (writerNumberSections options) + $ maybe id (defField "context-lang") mblang + $ (case getField "papersize" metadata of + Just ("a4" :: String) -> resetField "papersize" + ("A4" :: String) + _ -> id) $ metadata - let context' = defField "context-lang" (maybe "" (fromBcp47 . splitBy (=='-')) $ - getField "lang" context) - $ defField "context-dir" (toContextDir $ getField "dir" context) - $ context - return $ case writerTemplate options of - Nothing -> main - Just tpl -> renderTemplate' tpl context' + let context' = defField "context-dir" (toContextDir + $ getField "dir" context) context + case writerTemplate options of + Nothing -> return main + Just tpl -> renderTemplate' tpl context' toContextDir :: Maybe String -> String toContextDir (Just "rtl") = "r2l" @@ -186,6 +192,7 @@ blockToConTeXt b@(RawBlock _ _ ) = do return empty blockToConTeXt (Div (ident,_,kvs) bs) = do let align dir txt = "\\startalignment[" <> dir <> "]" $$ txt $$ "\\stopalignment" + mblang <- fromBCP47 (lookup "lang" kvs) let wrapRef txt = if null ident then txt else ("\\reference" <> brackets (text $ toLabel ident) <> @@ -194,9 +201,9 @@ blockToConTeXt (Div (ident,_,kvs) bs) = do Just "rtl" -> align "righttoleft" Just "ltr" -> align "lefttoright" _ -> id - wrapLang txt = case lookup "lang" kvs of + wrapLang txt = case mblang of Just lng -> "\\start\\language[" - <> text (fromBcp47' lng) <> "]" $$ txt $$ "\\stop" + <> text lng <> "]" $$ txt $$ "\\stop" Nothing -> txt wrapBlank txt = blankline <> txt <> blankline fmap (wrapBlank . wrapLang . wrapDir . wrapRef) $ blockListToConTeXt bs @@ -416,12 +423,13 @@ inlineToConTeXt (Note contents) = do else text "\\startbuffer " <> nest 2 contents' <> text "\\stopbuffer\\footnote{\\getbuffer}" inlineToConTeXt (Span (_,_,kvs) ils) = do + mblang <- fromBCP47 (lookup "lang" kvs) let wrapDir txt = case lookup "dir" kvs of Just "rtl" -> braces $ "\\righttoleft " <> txt Just "ltr" -> braces $ "\\lefttoright " <> txt _ -> txt - wrapLang txt = case lookup "lang" kvs of - Just lng -> "\\start\\language[" <> text (fromBcp47' lng) + wrapLang txt = case mblang of + Just lng -> "\\start\\language[" <> text lng <> "]" <> txt <> "\\stop " Nothing -> txt fmap (wrapLang . wrapDir) $ inlineListToConTeXt ils @@ -458,36 +466,34 @@ sectionHeader (ident,classes,_) hdrLevel lst = do <> blankline _ -> contents <> blankline -fromBcp47' :: String -> String -fromBcp47' = fromBcp47 . splitBy (=='-') +fromBCP47 :: PandocMonad m => Maybe String -> WM m (Maybe String) +fromBCP47 mbs = fromBCP47' <$> toLang mbs -- Takes a list of the constituents of a BCP 47 language code -- and irons out ConTeXt's exceptions -- https://tools.ietf.org/html/bcp47#section-2.1 -- http://wiki.contextgarden.net/Language_Codes -fromBcp47 :: [String] -> String -fromBcp47 [] = "" -fromBcp47 ("ar":"SY":_) = "ar-sy" -fromBcp47 ("ar":"IQ":_) = "ar-iq" -fromBcp47 ("ar":"JO":_) = "ar-jo" -fromBcp47 ("ar":"LB":_) = "ar-lb" -fromBcp47 ("ar":"DZ":_) = "ar-dz" -fromBcp47 ("ar":"MA":_) = "ar-ma" -fromBcp47 ("de":"1901":_) = "deo" -fromBcp47 ("de":"DE":_) = "de-de" -fromBcp47 ("de":"AT":_) = "de-at" -fromBcp47 ("de":"CH":_) = "de-ch" -fromBcp47 ("el":"poly":_) = "agr" -fromBcp47 ("en":"US":_) = "en-us" -fromBcp47 ("en":"GB":_) = "en-gb" -fromBcp47 ("grc":_) = "agr" -fromBcp47 x = fromIso $ head x - where - fromIso "el" = "gr" - fromIso "eu" = "ba" - fromIso "he" = "il" - fromIso "jp" = "ja" - fromIso "uk" = "ua" - fromIso "vi" = "vn" - fromIso "zh" = "cn" - fromIso l = l +fromBCP47' :: Maybe Lang -> Maybe String +fromBCP47' (Just (Lang "ar" _ "SY" _) ) = Just "ar-sy" +fromBCP47' (Just (Lang "ar" _ "IQ" _) ) = Just "ar-iq" +fromBCP47' (Just (Lang "ar" _ "JO" _) ) = Just "ar-jo" +fromBCP47' (Just (Lang "ar" _ "LB" _) ) = Just "ar-lb" +fromBCP47' (Just (Lang "ar" _ "DZ" _) ) = Just "ar-dz" +fromBCP47' (Just (Lang "ar" _ "MA" _) ) = Just "ar-ma" +fromBCP47' (Just (Lang "de" _ _ ["1901"]) ) = Just "deo" +fromBCP47' (Just (Lang "de" _ "DE" _) ) = Just "de-de" +fromBCP47' (Just (Lang "de" _ "AT" _) ) = Just "de-at" +fromBCP47' (Just (Lang "de" _ "CH" _) ) = Just "de-ch" +fromBCP47' (Just (Lang "el" _ _ ["poly"]) ) = Just "agr" +fromBCP47' (Just (Lang "en" _ "US" _) ) = Just "en-us" +fromBCP47' (Just (Lang "en" _ "GB" _) ) = Just "en-gb" +fromBCP47' (Just (Lang "grc"_ _ _) ) = Just "agr" +fromBCP47' (Just (Lang "el" _ _ _) ) = Just "gr" +fromBCP47' (Just (Lang "eu" _ _ _) ) = Just "ba" +fromBCP47' (Just (Lang "he" _ _ _) ) = Just "il" +fromBCP47' (Just (Lang "jp" _ _ _) ) = Just "ja" +fromBCP47' (Just (Lang "uk" _ _ _) ) = Just "ua" +fromBCP47' (Just (Lang "vi" _ _ _) ) = Just "vn" +fromBCP47' (Just (Lang "zh" _ _ _) ) = Just "cn" +fromBCP47' (Just (Lang l _ _ _) ) = Just l +fromBCP47' Nothing = Nothing diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 1314ef844..363bad99b 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -46,6 +46,7 @@ import Data.Typeable import GHC.IO.Encoding (getForeignEncoding, setForeignEncoding, utf8) import Scripting.Lua (LuaState, StackValue, callfunc) import qualified Scripting.Lua as Lua +import Text.Pandoc.Error import Text.Pandoc.Lua.Compat ( loadstring ) import Text.Pandoc.Lua.Util ( addValue ) import Text.Pandoc.Lua.SharedInstances () @@ -141,8 +142,10 @@ writeCustom luaFile opts doc@(Pandoc meta _) = do let body = rendered case writerTemplate opts of Nothing -> return $ pack body - Just tpl -> return $ pack $ - renderTemplate' tpl $ setField "body" body context + Just tpl -> + case applyTemplate (pack tpl) $ setField "body" body context of + Left e -> throw (PandocTemplateError e) + Right r -> return (pack r) docToCustom :: LuaState -> WriterOptions -> Pandoc -> IO String docToCustom lua opts (Pandoc (Meta metamap) blocks) = do diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 02ffbf831..9db9a0102 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -124,9 +124,9 @@ writeDocbook opts (Pandoc meta blocks) = do MathML -> True _ -> False) $ metadata - return $ case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context -- | Convert an Element to Docbook. elementToDocbook :: PandocMonad m => WriterOptions -> Int -> Element -> DB m Doc @@ -217,8 +217,10 @@ blockToDocbook opts (Div (ident,_,_) bs) = do (if null ident then mempty else selfClosingTag "anchor" [("id", ident)]) $$ contents -blockToDocbook _ (Header _ _ _) = - return empty -- should not occur after hierarchicalize +blockToDocbook _ h@(Header _ _ _) = do + -- should not occur after hierarchicalize, except inside lists/blockquotes + report $ BlockNotRendered h + return empty blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst -- title beginning with fig: indicates that the image is a figure blockToDocbook opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = do diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 63bb8a5ae..fb6b2013a 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -37,7 +37,7 @@ import Codec.Archive.Zip import Control.Applicative ((<|>)) import Control.Monad.Except (catchError) import Control.Monad.Reader -import Control.Monad.State +import Control.Monad.State.Strict import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BL8 @@ -68,6 +68,7 @@ import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Walk import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared (fixDisplayMath) +import Text.Pandoc.BCP47 (getLang, renderLang, toLang) import Text.Printf (printf) import Text.TeXMath import Text.XML.Light as XML @@ -257,12 +258,11 @@ writeDocx opts doc@(Pandoc meta _) = do ) -- styles - let lang = case lookupMeta "lang" meta of - Just (MetaInlines [Str s]) -> Just s - Just (MetaString s) -> Just s - _ -> Nothing + mblang <- toLang $ getLang opts meta let addLang :: Element -> Element - addLang e = case lang >>= \l -> (return . XMLC.toTree . go l . XMLC.fromElement) e of + addLang e = case mblang >>= \l -> + (return . XMLC.toTree . go (renderLang l) + . XMLC.fromElement) e of Just (Elem e') -> e' _ -> e -- return original where go :: String -> Cursor -> Cursor @@ -657,6 +657,9 @@ mkNumbering lists = do elts <- mapM mkAbstractNum (ordNub lists) return $ elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)] +maxListLevel :: Int +maxListLevel = 8 + mkNum :: ListMarker -> Int -> Element mkNum marker numid = mknode "w:num" [("w:numId",show numid)] @@ -666,7 +669,8 @@ mkNum marker numid = BulletMarker -> [] NumberMarker _ _ start -> map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))] - $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6] + $ mknode "w:startOverride" [("w:val",show start)] ()) + [0..maxListLevel] mkAbstractNum :: (PandocMonad m) => ListMarker -> m Element mkAbstractNum marker = do @@ -675,7 +679,8 @@ mkAbstractNum marker = do return $ mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)] $ mknode "w:nsid" [("w:val", printf "%8x" nsid)] () : mknode "w:multiLevelType" [("w:val","multilevel")] () - : map (mkLvl marker) [0..6] + : map (mkLvl marker) + [0..maxListLevel] mkLvl :: ListMarker -> Int -> Element mkLvl marker lvl = @@ -706,7 +711,7 @@ mkLvl marker lvl = bulletFor 3 = "\x2013" bulletFor 4 = "\x2022" bulletFor 5 = "\x2013" - bulletFor _ = "\x2022" + bulletFor x = bulletFor (x `mod` 6) styleFor UpperAlpha _ = "upperLetter" styleFor LowerAlpha _ = "lowerLetter" styleFor UpperRoman _ = "upperRoman" @@ -718,6 +723,7 @@ mkLvl marker lvl = styleFor DefaultStyle 4 = "decimal" styleFor DefaultStyle 5 = "lowerLetter" styleFor DefaultStyle 6 = "lowerRoman" + styleFor DefaultStyle x = styleFor DefaultStyle (x `mod` 7) styleFor _ _ = "decimal" patternFor OneParen s = s ++ ")" patternFor TwoParens s = "(" ++ s ++ ")" diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index 551a1b0b5..ad8689e8c 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -41,7 +41,7 @@ DokuWiki: <https://www.dokuwiki.org/dokuwiki> module Text.Pandoc.Writers.DokuWiki ( writeDokuWiki ) where import Control.Monad (zipWithM) import Control.Monad.Reader (ReaderT, ask, local, runReaderT) -import Control.Monad.State (StateT, evalStateT, gets, modify) +import Control.Monad.State.Strict (StateT, evalStateT, gets, modify) import Data.Default (Default (..)) import Data.List (intercalate, intersect, isPrefixOf, transpose) import Data.Text (Text, pack) @@ -103,7 +103,7 @@ pandocToDokuWiki opts (Pandoc meta blocks) = do $ metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Escape special characters for DokuWiki. escapeString :: String -> String diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index d68283007..a48fcf415 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -34,14 +34,14 @@ Conversion of 'Pandoc' documents to EPUB. module Text.Pandoc.Writers.EPUB ( writeEPUB2, writeEPUB3 ) where import Codec.Archive.Zip (Entry, addEntryToArchive, eRelativePath, emptyArchive, fromArchive, fromEntry, toEntry) -import Control.Monad (mplus, when, zipWithM) +import Control.Monad (mplus, when, unless, zipWithM) import Control.Monad.Except (catchError, throwError) -import Control.Monad.State (State, StateT, evalState, evalStateT, get, gets, +import Control.Monad.State.Strict (State, StateT, evalState, evalStateT, get, gets, lift, modify, put) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 import qualified Data.Text.Lazy as TL -import Data.Char (isAlphaNum, isDigit, toLower) +import Data.Char (isAlphaNum, isDigit, toLower, isAscii) import Data.List (intercalate, isInfixOf, isPrefixOf) import qualified Data.Map as M import Data.Maybe (catMaybes, fromMaybe) @@ -103,6 +103,7 @@ data EPUBMetadata = EPUBMetadata{ , epubCoverImage :: Maybe String , epubStylesheets :: [FilePath] , epubPageDirection :: Maybe ProgressionDirection + , epubIbooksFields :: [(String, String)] } deriving Show data Date = Date{ @@ -312,6 +313,7 @@ metadataFromMeta opts meta = EPUBMetadata{ , epubCoverImage = coverImage , epubStylesheets = stylesheets , epubPageDirection = pageDirection + , epubIbooksFields = ibooksFields } where identifiers = getIdentifier meta titles = getTitle meta @@ -339,6 +341,10 @@ metadataFromMeta opts meta = EPUBMetadata{ Just "ltr" -> Just LTR Just "rtl" -> Just RTL _ -> Nothing + ibooksFields = case lookupMeta "ibooks" meta of + Just (MetaMap mp) + -> M.toList $ M.map metaValueToString mp + _ -> [] -- | Produce an EPUB2 file from a Pandoc document. writeEPUB2 :: PandocMonad m @@ -361,8 +367,7 @@ writeEPUB :: PandocMonad m -> Pandoc -- ^ Document to convert -> m B.ByteString writeEPUB epubVersion opts doc = - let initState = EPUBState { stMediaPaths = [] - } + let initState = EPUBState { stMediaPaths = [] } in evalStateT (pandocToEPUB epubVersion opts doc) initState @@ -373,6 +378,10 @@ pandocToEPUB :: PandocMonad m -> Pandoc -> E m B.ByteString pandocToEPUB version opts doc@(Pandoc meta _) = do + let epubSubdir = writerEpubSubdirectory opts + -- sanity check on epubSubdir + unless (all (\c -> isAscii c && isAlphaNum c) epubSubdir) $ + throwError $ PandocEpubSubdirectoryError epubSubdir let epub3 = version == EPUB3 let writeHtml o = fmap (UTF8.fromTextLazy . TL.fromStrict) . writeHtmlStringForEPUB version o @@ -383,14 +392,15 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do -- stylesheet stylesheets <- case epubStylesheets metadata of [] -> (\x -> [B.fromChunks [x]]) <$> - P.readDataFile (writerUserDataDir opts) "epub.css" + P.readDataFile (writerUserDataDir opts) + "epub.css" fs -> mapM P.readFileLazy fs let stylesheetEntries = zipWith - (\bs n -> mkEntry ("stylesheet" ++ show n ++ ".css") bs) + (\bs n -> mkEntry ("styles/stylesheet" ++ show n ++ ".css") bs) stylesheets [(1 :: Int)..] let vars = ("epub3", if epub3 then "true" else "false") - : map (\e -> ("css", eRelativePath e)) stylesheetEntries + : map (\e -> ("css", "../" ++ eRelativePath e)) stylesheetEntries ++ [(x,y) | (x,y) <- writerVariables opts, x /= "css"] let opts' = opts{ writerEmailObfuscation = NoObfuscation , writerSectionDivs = True @@ -418,7 +428,7 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do tpContent <- lift $ writeHtml opts'{ writerVariables = ("titlepage","true"):vars } (Pandoc meta []) - let tpEntry = mkEntry "title_page.xhtml" tpContent + let tpEntry = mkEntry "text/title_page.xhtml" tpContent -- handle pictures -- mediaRef <- P.newIORef [] @@ -431,7 +441,8 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do when (null xs) $ report $ CouldNotFetchResource f "glob did not match any font files" return xs - let mkFontEntry f = mkEntry (takeFileName f) `fmap` (lift $ P.readFileLazy f) + let mkFontEntry f = mkEntry ("fonts/" ++ takeFileName f) <$> + lift (P.readFileLazy f) fontFiles <- concat <$> mapM matchingGlob (writerEpubFonts opts') fontEntries <- mapM mkFontEntry fontFiles @@ -516,7 +527,7 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do chapters' let chapToEntry num (Chapter mbnum bs) = - mkEntry (showChapter num) <$> + mkEntry ("text/" ++ showChapter num) <$> (writeHtml opts'{ writerNumberOffset = fromMaybe [] mbnum } $ case bs of (Header _ _ xs : _) -> @@ -572,7 +583,8 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do EPUB2 -> "2.0" EPUB3 -> "3.0") ,("xmlns","http://www.idpf.org/2007/opf") - ,("unique-identifier","epub-id-1")] $ + ,("unique-identifier","epub-id-1") + ,("prefix","ibooks: http://vocabulary.itunes.apple.com/rdf/ibooks/vocabulary-extensions-1.0/")] $ [ metadataElement version metadata currentTime , unode "manifest" $ [ unode "item" ! [("id","ncx"), ("href","toc.ncx") @@ -648,12 +660,12 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do navMapFormatter n tit src subs = unode "navPoint" ! [("id", "navPoint-" ++ show n)] $ [ unode "navLabel" $ unode "text" tit - , unode "content" ! [("src", src)] $ () + , unode "content" ! [("src", "text/" ++ src)] $ () ] ++ subs let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $ [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta) - , unode "content" ! [("src","title_page.xhtml")] $ () ] + , unode "content" ! [("src","text/title_page.xhtml")] $ () ] navMap <- lift $ evalStateT (mapM (navPointNode navMapFormatter) secs) 1 let tocData = UTF8.fromStringLazy $ ppTopElement $ @@ -681,7 +693,8 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do let navXhtmlFormatter :: Int -> String -> String -> [Element] -> Element navXhtmlFormatter n tit src subs = unode "li" ! [("id", "toc-li-" ++ show n)] $ - (unode "a" ! [("href",src)] + (unode "a" ! [("href", "text/" ++ + src)] $ tit) : case subs of [] -> [] @@ -714,7 +727,11 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do ] ] else [] - navData <- lift $ writeHtml opts'{ writerVariables = ("navpage","true"):vars } + navData <- lift $ writeHtml opts'{ writerVariables = ("navpage","true"): + -- remove the leading ../ from stylesheet paths: + map (\(k,v) -> if k == "css" + then (k, drop 3 v) + else (k, v)) vars } (Pandoc (setMeta "title" (walk removeNote $ fromList $ docTitle' meta) nullMeta) (navBlocks ++ landmarks)) @@ -728,7 +745,8 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do unode "container" ! [("version","1.0") ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $ unode "rootfiles" $ - unode "rootfile" ! [("full-path","content.opf") + unode "rootfile" ! [("full-path", + epubSubdir ++ ['/' | not (null epubSubdir)] ++ "content.opf") ,("media-type","application/oebps-package+xml")] $ () let containerEntry = mkEntry "META-INF/container.xml" containerData @@ -739,10 +757,14 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do unode "option" ! [("name","specified-fonts")] $ "true" let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple + let addEpubSubdir :: Entry -> Entry + addEpubSubdir e = e{ eRelativePath = + epubSubdir ++ ['/' | not (null epubSubdir)] ++ eRelativePath e } -- construct archive - let archive = foldr addEntryToArchive emptyArchive - (mimetypeEntry : containerEntry : appleEntry : tpEntry : - contentsEntry : tocEntry : navEntry : + let archive = foldr addEntryToArchive emptyArchive $ + [mimetypeEntry, containerEntry, appleEntry] ++ + map addEpubSubdir + (tpEntry : contentsEntry : tocEntry : navEntry : (stylesheetEntries ++ picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries)) return $ fromArchive archive @@ -751,7 +773,8 @@ metadataElement :: EPUBVersion -> EPUBMetadata -> UTCTime -> Element metadataElement version md currentTime = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ mdNodes - where mdNodes = identifierNodes ++ titleNodes ++ dateNodes ++ languageNodes + where mdNodes = identifierNodes ++ titleNodes ++ dateNodes + ++ languageNodes ++ ibooksNodes ++ creatorNodes ++ contributorNodes ++ subjectNodes ++ descriptionNodes ++ typeNodes ++ formatNodes ++ publisherNodes ++ sourceNodes ++ relationNodes @@ -770,6 +793,8 @@ metadataElement version md currentTime = [] -> [] (x:_) -> [dcNode "date" ! [("id","epub-date")] $ dateText x] + ibooksNodes = map ibooksNode (epubIbooksFields md) + ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" ++ k)] $ v languageNodes = [dcTag "language" $ epubLanguage md] creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $ epubCreator md @@ -883,10 +908,10 @@ modifyMediaRef opts oldsrc = do Nothing -> catchError (do (img, mbMime) <- P.fetchItem (writerSourceURL opts) oldsrc let new = "media/file" ++ show (length media) ++ - fromMaybe (takeExtension (takeWhile (/='?') oldsrc)) - (('.':) <$> (mbMime >>= extensionFromMimeType)) + fromMaybe (takeExtension (takeWhile (/='?') oldsrc)) + (('.':) <$> (mbMime >>= extensionFromMimeType)) epochtime <- floor `fmap` lift P.getPOSIXTime - let entry = toEntry new epochtime $ B.fromChunks . (:[]) $ img + let entry = toEntry new epochtime (B.fromChunks . (:[]) $ img) modify $ \st -> st{ stMediaPaths = (oldsrc, (new, Just entry)):media} return new) @@ -913,12 +938,13 @@ transformInline :: PandocMonad m -> E m Inline transformInline opts (Image attr lab (src,tit)) = do newsrc <- modifyMediaRef opts src - return $ Image attr lab (newsrc, tit) + return $ Image attr lab ("../" ++ newsrc, tit) transformInline opts (x@(Math t m)) | WebTeX url <- writerHTMLMathMethod opts = do newsrc <- modifyMediaRef opts (url ++ urlEncode m) let mathclass = if t == DisplayMath then "display" else "inline" - return $ Span ("",["math",mathclass],[]) [Image nullAttr [x] (newsrc, "")] + return $ Span ("",["math",mathclass],[]) + [Image nullAttr [x] ("../" ++ newsrc, "")] transformInline opts (RawInline fmt raw) | fmt == Format "html" = do let tags = parseTags raw diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 213756330..4c764d987 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -37,9 +37,9 @@ FictionBook is an XML-based e-book format. For more information see: -} module Text.Pandoc.Writers.FB2 (writeFB2) where -import Control.Monad.Except (catchError, throwError) -import Control.Monad.State (StateT, evalStateT, get, lift, modify) -import Control.Monad.State (liftM) +import Control.Monad.Except (catchError) +import Control.Monad.State.Strict (StateT, evalStateT, get, lift, modify) +import Control.Monad.State.Strict (liftM) import Data.ByteString.Base64 (encode) import qualified Data.ByteString.Char8 as B8 import Data.Char (isAscii, isControl, isSpace, toLower) @@ -54,7 +54,6 @@ import qualified Text.XML.Light.Cursor as XC import Text.Pandoc.Class (PandocMonad, report) import qualified Text.Pandoc.Class as P import Text.Pandoc.Definition -import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def) import Text.Pandoc.Shared (capitalize, isHeaderBlock, isURI, linesToPara, @@ -371,8 +370,10 @@ blockToXml (DefinitionList defs) = needsBreak (Para _) = False needsBreak (Plain ins) = LineBreak `notElem` ins needsBreak _ = True -blockToXml (Header _ _ _) = -- should never happen, see renderSections - throwError $ PandocShouldNeverHappenError "unexpected header in section text" +blockToXml h@(Header _ _ _) = do + -- should not occur after hierarchicalize, except inside lists/blockquotes + report $ BlockNotRendered h + return [] blockToXml HorizontalRule = return [ el "empty-line" () , el "p" (txt (replicate 10 '—')) diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 5ee8ab4ce..451123a6d 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -43,7 +43,7 @@ module Text.Pandoc.Writers.HTML ( writeDZSlides, writeRevealJs ) where -import Control.Monad.State +import Control.Monad.State.Strict import Data.Char (ord, toLower) import Data.Text (Text) import qualified Data.Text.Lazy as TL @@ -210,7 +210,7 @@ writeHtmlString' st opts d = do lookup "sourcefile" (writerVariables opts) report $ NoTitleElement fallback return $ resetField "pagetitle" fallback context - return $ renderTemplate' tpl $ + renderTemplate' tpl $ defField "body" (renderHtml' body) context' writeHtml' :: PandocMonad m => WriterState -> WriterOptions -> Pandoc -> m Html @@ -241,7 +241,7 @@ pandocToHtml opts (Pandoc meta blocks) = do then blocks else prepSlides slideLevel blocks toc <- if writerTableOfContents opts && slideVariant /= S5Slides - then tableOfContents opts sects + then fmap renderHtml' <$> tableOfContents opts sects else return Nothing blocks' <- liftM (mconcat . intersperse (nl opts)) $ mapM (elementToHtml slideLevel opts) sects @@ -253,7 +253,9 @@ pandocToHtml opts (Pandoc meta blocks) = do H.script ! A.src (toValue url) ! A.type_ "text/javascript" $ mempty - MathJax url -> + MathJax url + | slideVariant /= RevealJsSlides -> + -- mathjax is handled via a special plugin in revealjs H.script ! A.src (toValue url) ! A.type_ "text/javascript" $ case slideVariant of @@ -285,8 +287,16 @@ pandocToHtml opts (Pandoc meta blocks) = do (if stMath st then defField "math" (renderHtml' math) else id) $ + defField "mathjax" + (case writerHTMLMathMethod opts of + MathJax _ -> True + _ -> False) $ defField "quotes" (stQuotes st) $ - maybe id (defField "toc" . renderHtml') toc $ + -- for backwards compatibility we populate toc + -- with the contents of the toc, rather than a + -- boolean: + maybe id (defField "toc") toc $ + maybe id (defField "table-of-contents") toc $ defField "author-meta" authsMeta $ maybe id (defField "date-meta") (normalizeDate dateMeta) $ defField "pagetitle" (stringifyHTML (docTitle meta)) $ @@ -597,7 +607,8 @@ blockToHtml opts (Para lst) contents <- inlineListToHtml opts lst return $ H.p contents where - isEmptyRaw [RawInline f _] = f /= (Format "html") + isEmptyRaw [RawInline f _] = f `notElem` [Format "html", + Format "html4", Format "html5"] isEmptyRaw _ = False blockToHtml opts (LineBlock lns) = if writerWrapText opts == WrapNone @@ -626,14 +637,17 @@ blockToHtml opts (Div attr@(ident, classes, kvs) bs) = do NoSlides -> addAttrs opts' attr $ H.div $ contents' _ -> mempty else addAttrs opts (ident, classes', kvs) $ divtag $ contents' -blockToHtml opts (RawBlock f str) - | f == Format "html" = return $ preEscapedString str - | (f == Format "latex" || f == Format "tex") && - allowsMathEnvironments (writerHTMLMathMethod opts) && - isMathEnvironment str = blockToHtml opts $ Plain [Math DisplayMath str] - | otherwise = do - report $ BlockNotRendered (RawBlock f str) - return mempty +blockToHtml opts (RawBlock f str) = do + ishtml <- isRawHtml f + if ishtml + then return $ preEscapedString str + else if (f == Format "latex" || f == Format "tex") && + allowsMathEnvironments (writerHTMLMathMethod opts) && + isMathEnvironment str + then blockToHtml opts $ Plain [Math DisplayMath str] + else do + report $ BlockNotRendered (RawBlock f str) + return mempty blockToHtml _ (HorizontalRule) = do html5 <- gets stHtml5 return $ if html5 then H5.hr else H.hr @@ -971,11 +985,13 @@ inlineToHtml opts inline = do return $ case t of InlineMath -> m DisplayMath -> brtag >> m >> brtag - (RawInline f str) - | f == Format "html" -> return $ preEscapedString str - | otherwise -> do - report $ InlineNotRendered inline - return mempty + (RawInline f str) -> do + ishtml <- isRawHtml f + if ishtml + then return $ preEscapedString str + else do + report $ InlineNotRendered inline + return mempty (Link attr txt (s,_)) | "mailto:" `isPrefixOf` s -> do linkText <- inlineListToHtml opts txt lift $ obfuscateLink opts attr linkText s @@ -1123,3 +1139,9 @@ allowsMathEnvironments (MathJax _) = True allowsMathEnvironments (MathML) = True allowsMathEnvironments (WebTeX _) = True allowsMathEnvironments _ = False + +isRawHtml :: PandocMonad m => Format -> StateT WriterState m Bool +isRawHtml f = do + html5 <- gets stHtml5 + return $ f == Format "html" || + ((html5 && f == Format "html5") || f == Format "html4") diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 1ad9acd40..d1146ca73 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -33,7 +33,7 @@ Conversion of 'Pandoc' documents to haddock markup. Haddock: <http://www.haskell.org/haddock/doc/html/> -} module Text.Pandoc.Writers.Haddock (writeHaddock) where -import Control.Monad.State +import Control.Monad.State.Strict import Data.Default import Data.Text (Text) import Data.List (intersperse, transpose) @@ -80,7 +80,7 @@ pandocToHaddock opts (Pandoc meta blocks) = do $ metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Return haddock representation of notes. notesToHaddock :: PandocMonad m diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index 2884bc532..37df58e65 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -17,7 +17,7 @@ into InDesign with File -> Place. -} module Text.Pandoc.Writers.ICML (writeICML) where import Control.Monad.Except (catchError) -import Control.Monad.State +import Control.Monad.State.Strict import Data.List (intersperse, isInfixOf, isPrefixOf, stripPrefix) import qualified Data.Set as Set import Data.Text as Text (breakOnAll, pack) @@ -147,9 +147,9 @@ writeICML opts (Pandoc meta blocks) = do $ defField "parStyles" (render' $ parStylesToDoc st) $ defField "hyperlinks" (render' $ hyperlinksToDoc $ links st) $ metadata - return $ case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context -- | Auxilary functions for parStylesToDoc and charStylesToDoc. contains :: String -> (String, (String, String)) -> [(String, String)] diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index 1a8d80747..012ff8416 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -128,9 +128,9 @@ docToJATS opts (Pandoc meta blocks) = do MathML -> True _ -> False) $ metadata - return $ case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context -- | Convert an Element to JATS. elementToJATS :: PandocMonad m => WriterOptions -> Int -> Element -> DB m Doc @@ -203,8 +203,10 @@ blockToJATS opts (Div (ident,_,kvs) bs) = do [(k,v) | (k,v) <- kvs, k `elem` ["specific-use", "content-type", "orientation", "position"]] return $ inTags True "boxed-text" attr contents -blockToJATS _ (Header _ _ _) = - return empty -- should not occur after hierarchicalize +blockToJATS _ h@(Header _ _ _) = do + -- should not occur after hierarchicalize, except inside lists/blockquotes + report $ BlockNotRendered h + return empty -- No Plain, everything needs to be in a block-level tag blockToJATS opts (Plain lst) = blockToJATS opts (Para lst) -- title beginning with fig: indicates that the image is a figure diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 80606d510..55ecda819 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -35,16 +35,17 @@ module Text.Pandoc.Writers.LaTeX ( , writeBeamer ) where import Control.Applicative ((<|>)) -import Control.Monad.State +import Control.Monad.State.Strict import Data.Aeson (FromJSON, object, (.=)) import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isPunctuation, ord, toLower) -import Data.List (foldl', intercalate, intersperse, isInfixOf, nub, nubBy, +import Data.List (foldl', intercalate, intersperse, isInfixOf, nubBy, stripPrefix, (\\)) import Data.Maybe (catMaybes, fromMaybe, isJust) import Data.Text (Text) import qualified Data.Text as T import Network.URI (unEscapeString) +import Text.Pandoc.BCP47 (Lang(..), toLang, getLang, renderLang) import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight, @@ -188,7 +189,8 @@ pandocToLaTeX options (Pandoc meta blocks) = do st <- get titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta - let docLangs = nub $ query (extract "lang") blocks + docLangs <- catMaybes <$> + mapM (toLang . Just) (ordNub (query (extract "lang") blocks)) let hasStringValue x = isJust (getField x metadata :: Maybe String) let geometryFromMargins = intercalate [','] $ catMaybes $ map (\(x,y) -> @@ -198,6 +200,18 @@ pandocToLaTeX options (Pandoc meta blocks) = do ,("tmargin","margin-top") ,("bmargin","margin-bottom") ] + let toPolyObj lang = object [ "name" .= T.pack name + , "options" .= T.pack opts ] + where + (name, opts) = toPolyglossia lang + mblang <- toLang $ case getLang options meta of + Just l -> Just l + Nothing | null docLangs -> Nothing + | otherwise -> Just "en" + -- we need a default here since lang is used in template conditionals + + let dirs = query (extract "dir") blocks + let context = defField "toc" (writerTableOfContents options) $ defField "toc-depth" (show (writerTOCDepth options - if stBook st @@ -235,26 +249,24 @@ pandocToLaTeX options (Pandoc meta blocks) = do Biblatex -> defField "biblio-title" biblioTitle . defField "biblatex" True _ -> id) $ - -- set lang to something so polyglossia/babel is included - defField "lang" (if null docLangs then ""::String else "en") $ - defField "otherlangs" docLangs $ defField "colorlinks" (any hasStringValue ["citecolor", "urlcolor", "linkcolor", "toccolor"]) $ - defField "dir" (if (null $ query (extract "dir") blocks) - then ""::String - else "ltr") $ + (if null dirs + then id + else defField "dir" ("ltr" :: String)) $ defField "section-titles" True $ defField "geometry" geometryFromMargins $ + (case getField "papersize" metadata of + Just ("A4" :: String) -> resetField "papersize" + ("a4" :: String) + _ -> id) $ metadata - let toPolyObj lang = object [ "name" .= T.pack name - , "options" .= T.pack opts ] - where - (name, opts) = toPolyglossia lang - let lang = maybe [] (splitBy (=='-')) $ getField "lang" context - otherlangs = maybe [] (map $ splitBy (=='-')) $ getField "otherlangs" context let context' = - defField "babel-lang" (toBabel lang) - $ defField "babel-otherlangs" (map toBabel otherlangs) + -- note: lang is used in some conditionals in the template, + -- so we need to set it if we have any babel/polyglossia: + maybe id (defField "lang" . renderLang) mblang + $ maybe id (defField "babel-lang" . toBabel) mblang + $ defField "babel-otherlangs" (map toBabel docLangs) $ defField "babel-newcommands" (concatMap (\(poly, babel) -> -- \textspanish and \textgalician are already used by babel -- save them as \oritext... and let babel use that @@ -274,20 +286,16 @@ pandocToLaTeX options (Pandoc meta blocks) = do -- eliminate duplicates that have same polyglossia name $ nubBy (\a b -> fst a == fst b) -- find polyglossia and babel names of languages used in the document - $ map (\l -> - let lng = splitBy (=='-') l - in (fst $ toPolyglossia lng, toBabel lng) - ) - docLangs ) - $ defField "polyglossia-lang" (toPolyObj lang) - $ defField "polyglossia-otherlangs" (map toPolyObj otherlangs) - $ defField "latex-dir-rtl" (case (getField "dir" context)::Maybe String of - Just "rtl" -> True - _ -> False) + $ map (\l -> (fst $ toPolyglossia l, toBabel l)) docLangs + ) + $ maybe id (defField "polyglossia-lang" . toPolyObj) mblang + $ defField "polyglossia-otherlangs" (map toPolyObj docLangs) + $ defField "latex-dir-rtl" + (getField "dir" context == Just ("rtl" :: String)) $ context - return $ case writerTemplate options of - Nothing -> main - Just tpl -> renderTemplate' tpl context' + case writerTemplate options of + Nothing -> return main + Just tpl -> renderTemplate' tpl context' -- | Convert Elements to LaTeX elementToLaTeX :: PandocMonad m => WriterOptions -> Element -> LW m Doc @@ -443,11 +451,12 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) = do -> "\\leavevmode" <> linkAnchor' <> "%" _ -> linkAnchor' let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir + lang <- toLang $ lookup "lang" kvs let wrapDir = case lookup "dir" kvs of Just "rtl" -> align "RTL" Just "ltr" -> align "LTR" _ -> id - wrapLang txt = case lookup "lang" kvs of + wrapLang txt = case lang of Just lng -> let (l, o) = toPolyglossiaEnv lng ops = if null o then "" @@ -647,23 +656,25 @@ blockToLaTeX (Header level (id',classes,_) lst) = do modify $ \s -> s{stInHeading = False} return hdr blockToLaTeX (Table caption aligns widths heads rows) = do - headers <- if all null heads - then return empty - else do - contents <- (tableRowToLaTeX True aligns widths) heads - return ("\\toprule" $$ contents $$ "\\midrule") - let endhead = if all null heads - then empty - else text "\\endhead" - let endfirsthead = if all null heads - then empty - else text "\\endfirsthead" + let toHeaders hs = do contents <- (tableRowToLaTeX True aligns widths) hs + return ("\\toprule" $$ contents $$ "\\midrule") + let removeNote (Note _) = Span ("", [], []) [] + removeNote x = x captionText <- inlineListToLaTeX caption + firsthead <- if isEmpty captionText || all null heads + then return empty + else ($$ text "\\endfirsthead") <$> toHeaders heads + head' <- if all null heads + then return empty + -- avoid duplicate notes in head and firsthead: + else ($$ text "\\endhead") <$> + toHeaders (if isEmpty firsthead + then heads + else walk removeNote heads) let capt = if isEmpty captionText then empty - else text "\\caption" <> braces captionText <> "\\tabularnewline" - $$ headers - $$ endfirsthead + else text "\\caption" <> + braces captionText <> "\\tabularnewline" rows' <- mapM (tableRowToLaTeX False aligns widths) rows let colDescriptors = text $ concat $ map toColDescriptor aligns modify $ \s -> s{ stTable = True } @@ -671,9 +682,9 @@ blockToLaTeX (Table caption aligns widths heads rows) = do braces ("@{}" <> colDescriptors <> "@{}") -- the @{} removes extra space at beginning and end $$ capt + $$ firsthead $$ (if all null heads then "\\toprule" else empty) - $$ headers - $$ endhead + $$ head' $$ vcat rows' $$ "\\bottomrule" $$ "\\end{longtable}" @@ -916,13 +927,14 @@ inlineToLaTeX :: PandocMonad m -> LW m Doc inlineToLaTeX (Span (id',classes,kvs) ils) = do linkAnchor <- hypertarget False id' empty + lang <- toLang $ lookup "lang" kvs let cmds = ["textup" | "csl-no-emph" `elem` classes] ++ ["textnormal" | "csl-no-strong" `elem` classes || "csl-no-smallcaps" `elem` classes] ++ ["RL" | ("dir", "rtl") `elem` kvs] ++ ["LR" | ("dir", "ltr") `elem` kvs] ++ - (case lookup "lang" kvs of - Just lng -> let (l, o) = toPolyglossia $ splitBy (=='-') lng + (case lang of + Just lng -> let (l, o) = toPolyglossia lng ops = if null o then "" else ("[" ++ o ++ "]") in ["text" ++ l ++ ops] Nothing -> []) @@ -1237,9 +1249,9 @@ mbBraced x = if not (all isAlphaNum x) -- Extract a key from divs and spans extract :: String -> Block -> [String] extract key (Div attr _) = lookKey key attr -extract key (Plain ils) = concatMap (extractInline key) ils -extract key (Para ils) = concatMap (extractInline key) ils -extract key (Header _ _ ils) = concatMap (extractInline key) ils +extract key (Plain ils) = query (extractInline key) ils +extract key (Para ils) = query (extractInline key) ils +extract key (Header _ _ ils) = query (extractInline key) ils extract _ _ = [] -- Extract a key from spans @@ -1252,85 +1264,95 @@ lookKey :: String -> Attr -> [String] lookKey key (_,_,kvs) = maybe [] words $ lookup key kvs -- In environments \Arabic instead of \arabic is used -toPolyglossiaEnv :: String -> (String, String) +toPolyglossiaEnv :: Lang -> (String, String) toPolyglossiaEnv l = - case toPolyglossia $ (splitBy (=='-')) l of + case toPolyglossia l of ("arabic", o) -> ("Arabic", o) x -> x -- Takes a list of the constituents of a BCP 47 language code and -- converts it to a Polyglossia (language, options) tuple -- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf -toPolyglossia :: [String] -> (String, String) -toPolyglossia ("ar":"DZ":_) = ("arabic", "locale=algeria") -toPolyglossia ("ar":"IQ":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"JO":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"LB":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"LY":_) = ("arabic", "locale=libya") -toPolyglossia ("ar":"MA":_) = ("arabic", "locale=morocco") -toPolyglossia ("ar":"MR":_) = ("arabic", "locale=mauritania") -toPolyglossia ("ar":"PS":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"SY":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"TN":_) = ("arabic", "locale=tunisia") -toPolyglossia ("de":"1901":_) = ("german", "spelling=old") -toPolyglossia ("de":"AT":"1901":_) = ("german", "variant=austrian, spelling=old") -toPolyglossia ("de":"AT":_) = ("german", "variant=austrian") -toPolyglossia ("de":"CH":"1901":_) = ("german", "variant=swiss, spelling=old") -toPolyglossia ("de":"CH":_) = ("german", "variant=swiss") -toPolyglossia ("de":_) = ("german", "") -toPolyglossia ("dsb":_) = ("lsorbian", "") -toPolyglossia ("el":"polyton":_) = ("greek", "variant=poly") -toPolyglossia ("en":"AU":_) = ("english", "variant=australian") -toPolyglossia ("en":"CA":_) = ("english", "variant=canadian") -toPolyglossia ("en":"GB":_) = ("english", "variant=british") -toPolyglossia ("en":"NZ":_) = ("english", "variant=newzealand") -toPolyglossia ("en":"UK":_) = ("english", "variant=british") -toPolyglossia ("en":"US":_) = ("english", "variant=american") -toPolyglossia ("grc":_) = ("greek", "variant=ancient") -toPolyglossia ("hsb":_) = ("usorbian", "") -toPolyglossia ("la":"x":"classic":_) = ("latin", "variant=classic") -toPolyglossia ("sl":_) = ("slovenian", "") -toPolyglossia x = (commonFromBcp47 x, "") +toPolyglossia :: Lang -> (String, String) +toPolyglossia (Lang "ar" _ "DZ" _) = ("arabic", "locale=algeria") +toPolyglossia (Lang "ar" _ "IQ" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "JO" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "LB" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "LY" _) = ("arabic", "locale=libya") +toPolyglossia (Lang "ar" _ "MA" _) = ("arabic", "locale=morocco") +toPolyglossia (Lang "ar" _ "MR" _) = ("arabic", "locale=mauritania") +toPolyglossia (Lang "ar" _ "PS" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "SY" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "TN" _) = ("arabic", "locale=tunisia") +toPolyglossia (Lang "de" _ _ vars) + | "1901" `elem` vars = ("german", "spelling=old") +toPolyglossia (Lang "de" _ "AT" vars) + | "1901" `elem` vars = ("german", "variant=austrian, spelling=old") +toPolyglossia (Lang "de" _ "AT" _) = ("german", "variant=austrian") +toPolyglossia (Lang "de" _ "CH" vars) + | "1901" `elem` vars = ("german", "variant=swiss, spelling=old") +toPolyglossia (Lang "de" _ "CH" _) = ("german", "variant=swiss") +toPolyglossia (Lang "de" _ _ _) = ("german", "") +toPolyglossia (Lang "dsb" _ _ _) = ("lsorbian", "") +toPolyglossia (Lang "el" _ "polyton" _) = ("greek", "variant=poly") +toPolyglossia (Lang "en" _ "AU" _) = ("english", "variant=australian") +toPolyglossia (Lang "en" _ "CA" _) = ("english", "variant=canadian") +toPolyglossia (Lang "en" _ "GB" _) = ("english", "variant=british") +toPolyglossia (Lang "en" _ "NZ" _) = ("english", "variant=newzealand") +toPolyglossia (Lang "en" _ "UK" _) = ("english", "variant=british") +toPolyglossia (Lang "en" _ "US" _) = ("english", "variant=american") +toPolyglossia (Lang "grc" _ _ _) = ("greek", "variant=ancient") +toPolyglossia (Lang "hsb" _ _ _) = ("usorbian", "") +toPolyglossia (Lang "la" _ _ vars) + | "x-classic" `elem` vars = ("latin", "variant=classic") +toPolyglossia (Lang "sl" _ _ _) = ("slovenian", "") +toPolyglossia x = (commonFromBcp47 x, "") -- Takes a list of the constituents of a BCP 47 language code and -- converts it to a Babel language string. -- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf -- List of supported languages (slightly outdated): -- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf -toBabel :: [String] -> String -toBabel ("de":"1901":_) = "german" -toBabel ("de":"AT":"1901":_) = "austrian" -toBabel ("de":"AT":_) = "naustrian" -toBabel ("de":"CH":"1901":_) = "swissgerman" -toBabel ("de":"CH":_) = "nswissgerman" -toBabel ("de":_) = "ngerman" -toBabel ("dsb":_) = "lowersorbian" -toBabel ("el":"polyton":_) = "polutonikogreek" -toBabel ("en":"AU":_) = "australian" -toBabel ("en":"CA":_) = "canadian" -toBabel ("en":"GB":_) = "british" -toBabel ("en":"NZ":_) = "newzealand" -toBabel ("en":"UK":_) = "british" -toBabel ("en":"US":_) = "american" -toBabel ("fr":"CA":_) = "canadien" -toBabel ("fra":"aca":_) = "acadian" -toBabel ("grc":_) = "polutonikogreek" -toBabel ("hsb":_) = "uppersorbian" -toBabel ("la":"x":"classic":_) = "classiclatin" -toBabel ("sl":_) = "slovene" -toBabel x = commonFromBcp47 x +toBabel :: Lang -> String +toBabel (Lang "de" _ "AT" vars) + | "1901" `elem` vars = "austrian" + | otherwise = "naustrian" +toBabel (Lang "de" _ "CH" vars) + | "1901" `elem` vars = "swissgerman" + | otherwise = "nswissgerman" +toBabel (Lang "de" _ _ vars) + | "1901" `elem` vars = "german" + | otherwise = "ngerman" +toBabel (Lang "dsb" _ _ _) = "lowersorbian" +toBabel (Lang "el" _ _ vars) + | "polyton" `elem` vars = "polutonikogreek" +toBabel (Lang "en" _ "AU" _) = "australian" +toBabel (Lang "en" _ "CA" _) = "canadian" +toBabel (Lang "en" _ "GB" _) = "british" +toBabel (Lang "en" _ "NZ" _) = "newzealand" +toBabel (Lang "en" _ "UK" _) = "british" +toBabel (Lang "en" _ "US" _) = "american" +toBabel (Lang "fr" _ "CA" _) = "canadien" +toBabel (Lang "fra" _ _ vars) + | "aca" `elem` vars = "acadian" +toBabel (Lang "grc" _ _ _) = "polutonikogreek" +toBabel (Lang "hsb" _ _ _) = "uppersorbian" +toBabel (Lang "la" _ _ vars) + | "x-classic" `elem` vars = "classiclatin" +toBabel (Lang "sl" _ _ _) = "slovene" +toBabel x = commonFromBcp47 x -- Takes a list of the constituents of a BCP 47 language code -- and converts it to a string shared by Babel and Polyglossia. -- https://tools.ietf.org/html/bcp47#section-2.1 -commonFromBcp47 :: [String] -> String -commonFromBcp47 [] = "" -commonFromBcp47 ("pt":"BR":_) = "brazil" +commonFromBcp47 :: Lang -> String +commonFromBcp47 (Lang "pt" _ "BR" _) = "brazil" -- Note: documentation says "brazilian" works too, but it doesn't seem to work -- on some systems. See #2953. -commonFromBcp47 ("sr":"Cyrl":_) = "serbianc" -commonFromBcp47 ("zh":"Latn":"pinyin":_) = "pinyin" -commonFromBcp47 x = fromIso $ head x +commonFromBcp47 (Lang "sr" "Cyrl" _ _) = "serbianc" +commonFromBcp47 (Lang "zh" "Latn" _ vars) + | "pinyin" `elem` vars = "pinyin" +commonFromBcp47 (Lang l _ _ _) = fromIso l where fromIso "af" = "afrikaans" fromIso "am" = "amharic" diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 0fc6afbdc..4e756c419 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -31,7 +31,7 @@ Conversion of 'Pandoc' documents to groff man page format. -} module Text.Pandoc.Writers.Man ( writeMan) where import Control.Monad.Except (throwError) -import Control.Monad.State +import Control.Monad.State.Strict import Data.List (intercalate, intersperse, stripPrefix, sort) import qualified Data.Map as Map import Data.Maybe (fromMaybe) @@ -110,7 +110,7 @@ pandocToMan opts (Pandoc meta blocks) = do $ metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Return man representation of notes. notesToMan :: PandocMonad m => WriterOptions -> [[Block]] -> StateT WriterState m Doc diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 3ac677943..1e0d8bde2 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -35,7 +35,7 @@ Markdown: <http://daringfireball.net/projects/markdown/> -} module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where import Control.Monad.Reader -import Control.Monad.State +import Control.Monad.State.Strict import Data.Char (chr, isPunctuation, isSpace, ord) import Data.Default import qualified Data.HashMap.Strict as H @@ -209,8 +209,8 @@ pandocToMarkdown opts (Pandoc meta blocks) = do Nothing -> empty let headerBlocks = filter isHeaderBlock blocks toc <- if writerTableOfContents opts - then tableOfContents opts headerBlocks - else return empty + then render' <$> tableOfContents opts headerBlocks + else return "" -- Strip off final 'references' header if markdown citations enabled let blocks' = if isEnabled Ext_citations opts then case reverse blocks of @@ -220,7 +220,11 @@ pandocToMarkdown opts (Pandoc meta blocks) = do body <- blockListToMarkdown opts blocks' notesAndRefs' <- notesAndRefs opts let main = render' $ body <> notesAndRefs' - let context = defField "toc" (render' toc) + let context = -- for backwards compatibility we populate toc + -- with the contents of the toc, rather than a + -- boolean: + defField "toc" toc + $ defField "table-of-contents" toc $ defField "body" main $ (if isNullMeta meta then id @@ -228,7 +232,7 @@ pandocToMarkdown opts (Pandoc meta blocks) = do $ addVariablesToJSON opts metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Return markdown representation of reference key table. refsToMarkdown :: PandocMonad m => WriterOptions -> Refs -> MD m Doc @@ -412,6 +416,9 @@ blockToMarkdown' opts (Plain inlines) = do '+':s:_ | not isPlain && isSpace s -> "\\" <> contents '*':s:_ | not isPlain && isSpace s -> "\\" <> contents '-':s:_ | not isPlain && isSpace s -> "\\" <> contents + '+':[] | not isPlain -> "\\" <> contents + '*':[] | not isPlain -> "\\" <> contents + '-':[] | not isPlain -> "\\" <> contents '|':_ | (isEnabled Ext_line_blocks opts || isEnabled Ext_pipe_tables opts) && isEnabled Ext_all_symbols_escapable opts @@ -433,8 +440,10 @@ blockToMarkdown' opts (LineBlock lns) = return $ (vcat $ map (hang 2 (text "| ")) mdLines) <> blankline else blockToMarkdown opts $ linesToPara lns blockToMarkdown' opts b@(RawBlock f str) - | f == "markdown" = return $ text str <> text "\n" - | f == "html" && isEnabled Ext_raw_html opts = do + | f `elem` ["markdown", "markdown_github", "markdown_phpextra", + "markdown_mmd", "markdown_strict"] + = return $ text str <> text "\n" + | f `elem` ["html", "html5", "html4"] && isEnabled Ext_raw_html opts = do plain <- asks envPlain return $ if plain then empty @@ -1053,10 +1062,12 @@ inlineToMarkdown opts (Math DisplayMath str) = (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts) inlineToMarkdown opts il@(RawInline f str) = do plain <- asks envPlain - if not plain && - ( f == "markdown" || + if (plain && f == "plain") || (not plain && + ( f `elem` ["markdown", "markdown_github", "markdown_phpextra", + "markdown_mmd", "markdown_strict"] || (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) || - (isEnabled Ext_raw_html opts && f == "html") ) + (isEnabled Ext_raw_html opts && f `elem` ["html", "html4", "html5"]) + )) then return $ text str else do report $ InlineNotRendered il diff --git a/src/Text/Pandoc/Writers/Math.hs b/src/Text/Pandoc/Writers/Math.hs index 104d3c20b..58252d60f 100644 --- a/src/Text/Pandoc/Writers/Math.hs +++ b/src/Text/Pandoc/Writers/Math.hs @@ -1,6 +1,8 @@ module Text.Pandoc.Writers.Math ( texMathToInlines , convertMath + , defaultMathJaxURL + , defaultKaTeXURL ) where @@ -47,3 +49,8 @@ convertMath writer mt str = do DisplayMath -> DisplayBlock InlineMath -> DisplayInline +defaultMathJaxURL :: String +defaultMathJaxURL = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/" + +defaultKaTeXURL :: String +defaultKaTeXURL = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.6.0/" diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index c70e5b786..58d1b0707 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -31,7 +31,7 @@ MediaWiki: <http://www.mediawiki.org/wiki/MediaWiki> -} module Text.Pandoc.Writers.MediaWiki ( writeMediaWiki ) where import Control.Monad.Reader -import Control.Monad.State +import Control.Monad.State.Strict import Data.List (intercalate) import qualified Data.Set as Set import Data.Text (Text, pack) @@ -82,9 +82,8 @@ pandocToMediaWiki (Pandoc meta blocks) = do let main = body ++ notes let context = defField "body" main $ defField "toc" (writerTableOfContents opts) metadata - return $ pack - $ case writerTemplate opts of - Nothing -> main + pack <$> case writerTemplate opts of + Nothing -> return main Just tpl -> renderTemplate' tpl context -- | Escape special characters for MediaWiki. diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index c5c3d9f5b..493da1545 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -52,7 +52,7 @@ import Text.Pandoc.Pretty import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.ImageSize import Text.Pandoc.Logging -import Control.Monad.State +import Control.Monad.State.Strict import Data.Char ( isLower, isUpper, toUpper ) import Text.TeXMath (writeEqn) import System.FilePath (takeExtension) @@ -125,7 +125,7 @@ pandocToMs opts (Pandoc meta blocks) = do $ metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Association list of characters to escape. msEscapes :: Map.Map Char String @@ -411,7 +411,8 @@ definitionListItemToMs opts (label, defs) = do let (first, rest) = case blocks of ((Para x):y) -> (Plain x,y) (x:y) -> (x,y) - [] -> error "blocks is null" + [] -> (Plain [], []) + -- should not happen rest' <- liftM vcat $ mapM (\item -> blockToMs opts item) rest first' <- blockToMs opts first diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs index 85e0b5467..0383d9d86 100644 --- a/src/Text/Pandoc/Writers/Muse.hs +++ b/src/Text/Pandoc/Writers/Muse.hs @@ -42,7 +42,7 @@ However, @\<literal style="html">@ tag is used for HTML raw blocks even though it is supported only in Emacs Muse. -} module Text.Pandoc.Writers.Muse (writeMuse) where -import Control.Monad.State +import Control.Monad.State.Strict import Data.Text (Text) import Data.List (intersperse, transpose, isInfixOf) import System.FilePath (takeExtension) @@ -97,11 +97,17 @@ pandocToMuse (Pandoc meta blocks) = do body <- blockListToMuse blocks notes <- liftM (reverse . stNotes) get >>= notesToMuse let main = render colwidth $ body $+$ notes - let context = defField "body" main - $ metadata + let context = defField "body" main metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context + +-- | Convert list of Pandoc block elements to Muse +-- | without setting stTopLevel. +flatBlockListToMuse :: PandocMonad m + => [Block] -- ^ List of block elements + -> StateT WriterState m Doc +flatBlockListToMuse blocks = cat <$> mapM blockToMuse blocks -- | Convert list of Pandoc block elements to Muse. blockListToMuse :: PandocMonad m @@ -112,11 +118,11 @@ blockListToMuse blocks = do modify $ \s -> s { stTopLevel = not $ stInsideBlock s , stInsideBlock = True } - contents <- mapM blockToMuse blocks + result <- flatBlockListToMuse blocks modify $ \s -> s { stTopLevel = stTopLevel oldState , stInsideBlock = stInsideBlock oldState } - return $ cat contents + return result -- | Convert Pandoc block element to Muse. blockToMuse :: PandocMonad m @@ -129,23 +135,23 @@ blockToMuse (Para inlines) = do blockToMuse (LineBlock lns) = do let splitStanza [] = [] splitStanza xs = case break (== mempty) xs of - (l, []) -> l : [] + (l, []) -> [l] (l, _:r) -> l : splitStanza r let joinWithLinefeeds = nowrap . mconcat . intersperse cr let joinWithBlankLines = mconcat . intersperse blankline let prettyfyStanza ls = joinWithLinefeeds <$> mapM inlineListToMuse ls contents <- joinWithBlankLines <$> mapM prettyfyStanza (splitStanza lns) return $ blankline $$ "<verse>" $$ contents $$ "</verse>" <> blankline -blockToMuse (CodeBlock (_,_,_) str) = do +blockToMuse (CodeBlock (_,_,_) str) = return $ "<example>" $$ text str $$ "</example>" $$ blankline blockToMuse (RawBlock (Format format) str) = return $ blankline $$ "<literal style=\"" <> text format <> "\">" $$ text str $$ "</literal>" $$ blankline blockToMuse (BlockQuote blocks) = do - contents <- blockListToMuse blocks + contents <- flatBlockListToMuse blocks return $ blankline <> "<quote>" - $$ flush contents -- flush to drop blanklines + $$ nest 0 contents -- nest 0 to remove trailing blank lines $$ "</quote>" <> blankline blockToMuse (OrderedList (start, style, _) items) = do @@ -154,11 +160,10 @@ blockToMuse (OrderedList (start, style, _) items) = do let maxMarkerLength = maximum $ map length markers let markers' = map (\m -> let s = maxMarkerLength - length m in m ++ replicate s ' ') markers - contents <- mapM (\(item, num) -> orderedListItemToMuse item num) $ - zip markers' items + contents <- zipWithM orderedListItemToMuse markers' items -- ensure that sublists have preceding blank line topLevel <- gets stTopLevel - return $ cr $$ ((if topLevel then nest 1 else id) $ vcat contents) $$ blankline + return $ cr $$ (if topLevel then nest 1 else id) (vcat contents) $$ blankline where orderedListItemToMuse :: PandocMonad m => String -- ^ marker for list item -> [Block] -- ^ list item (list of blocks) @@ -170,7 +175,7 @@ blockToMuse (BulletList items) = do contents <- mapM bulletListItemToMuse items -- ensure that sublists have preceding blank line topLevel <- gets stTopLevel - return $ cr $$ ((if topLevel then nest 1 else id) $ vcat contents) $$ blankline + return $ cr $$ (if topLevel then nest 1 else id) (vcat contents) $$ blankline where bulletListItemToMuse :: PandocMonad m => [Block] -> StateT WriterState m Doc @@ -179,7 +184,7 @@ blockToMuse (BulletList items) = do return $ hang 2 "- " contents blockToMuse (DefinitionList items) = do contents <- mapM definitionListItemToMuse items - return $ cr $$ (nest 1 $ vcat $ contents) $$ blankline + return $ cr $$ nest 1 (vcat contents) $$ blankline where definitionListItemToMuse :: PandocMonad m => ([Inline], [[Block]]) -> StateT WriterState m Doc @@ -218,8 +223,8 @@ blockToMuse (Table caption _ _ headers rows) = do -- FIXME: Muse doesn't allow blocks with height more than 1. let hpipeBlocks sep blocks = hcat $ intersperse sep' blocks where h = maximum (1 : map height blocks) - sep' = lblock (length sep) $ vcat (map text $ replicate h sep) - let makeRow sep = (" " <>) . (hpipeBlocks sep . zipWith lblock widthsInChars) + sep' = lblock (length sep) $ vcat (replicate h (text sep)) + let makeRow sep = (" " <>) . hpipeBlocks sep . zipWith lblock widthsInChars let head' = makeRow " || " headers' let rowSeparator = if noHeaders then " | " else " | " rows'' <- mapM (\row -> do cols <- mapM blockListToMuse row @@ -236,9 +241,7 @@ blockToMuse Null = return empty notesToMuse :: PandocMonad m => Notes -> StateT WriterState m Doc -notesToMuse notes = - mapM (\(num, note) -> noteToMuse num note) (zip [1..] notes) >>= - return . vsep +notesToMuse notes = liftM vsep (zipWithM noteToMuse [1 ..] notes) -- | Return Muse representation of a note. noteToMuse :: PandocMonad m @@ -268,7 +271,7 @@ conditionalEscapeString s inlineListToMuse :: PandocMonad m => [Inline] -> StateT WriterState m Doc -inlineListToMuse lst = mapM inlineToMuse lst >>= return . hcat +inlineListToMuse lst = liftM hcat (mapM inlineToMuse lst) -- | Convert Pandoc inline element to Muse. inlineToMuse :: PandocMonad m @@ -316,7 +319,7 @@ inlineToMuse Space = return space inlineToMuse SoftBreak = do wrapText <- gets $ writerWrapText . stOptions return $ if wrapText == WrapPreserve then cr else space -inlineToMuse (Link _ txt (src, _)) = do +inlineToMuse (Link _ txt (src, _)) = case txt of [Str x] | escapeURI x == src -> return $ "[[" <> text (escapeLink x) <> "]]" @@ -340,7 +343,7 @@ inlineToMuse (Note contents) = do -- add to notes in state notes <- gets stNotes modify $ \st -> st { stNotes = contents:notes } - let ref = show $ (length notes) + 1 + let ref = show $ length notes + 1 return $ "[" <> text ref <> "]" inlineToMuse (Span (_,name:_,_) inlines) = do contents <- inlineListToMuse inlines diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 1da051380..785891a9f 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -31,8 +31,9 @@ Conversion of 'Pandoc' documents to ODT. module Text.Pandoc.Writers.ODT ( writeODT ) where import Codec.Archive.Zip import Control.Monad.Except (catchError) -import Control.Monad.State +import Control.Monad.State.Strict import qualified Data.ByteString.Lazy as B +import Data.Generics (everywhere', mkT) import Data.List (isPrefixOf) import Data.Maybe (fromMaybe) import qualified Data.Text.Lazy as TL @@ -46,13 +47,14 @@ import Text.Pandoc.MIME (extensionFromMimeType, getMimeType) import Text.Pandoc.Options (WrapOption (..), WriterOptions (..)) import Text.Pandoc.Pretty import Text.Pandoc.Shared (stringify) -import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy) +import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy) import Text.Pandoc.Walk import Text.Pandoc.Writers.OpenDocument (writeOpenDocument) import Text.Pandoc.Writers.Shared (fixDisplayMath) +import Text.Pandoc.BCP47 (getLang, toLang, Lang(..), renderLang) import Text.Pandoc.XML import Text.TeXMath -import Text.XML.Light.Output +import Text.XML.Light data ODTState = ODTState { stEntries :: [Entry] } @@ -78,6 +80,7 @@ pandocToODT :: PandocMonad m pandocToODT opts doc@(Pandoc meta _) = do let datadir = writerUserDataDir opts let title = docTitle meta + lang <- toLang (getLang opts meta) refArchive <- case writerReferenceDoc opts of Just f -> liftM toArchive $ lift $ P.readFileLazy f @@ -132,18 +135,49 @@ pandocToODT opts doc@(Pandoc meta _) = do ,("xmlns:ooo","http://openoffice.org/2004/office") ,("xmlns:grddl","http://www.w3.org/2003/g/data-view#") ,("office:version","1.2")] - $ ( inTagsSimple "office:meta" - $ ( inTagsSimple "dc:title" (text $ escapeStringForXML (stringify title)) - ) + $ ( inTagsSimple "office:meta" $ + ( inTagsSimple "dc:title" + (text $ escapeStringForXML (stringify title)) + $$ + case lang of + Just l -> inTagsSimple "dc:language" + (text (escapeStringForXML (renderLang l))) + Nothing -> empty + ) ) ) -- make sure mimetype is first let mimetypeEntry = toEntry "mimetype" epochtime $ fromStringLazy "application/vnd.oasis.opendocument.text" - let archive'' = addEntryToArchive mimetypeEntry + archive'' <- updateStyleWithLang lang + $ addEntryToArchive mimetypeEntry $ addEntryToArchive metaEntry archive' return $ fromArchive archive'' +updateStyleWithLang :: PandocMonad m => Maybe Lang -> Archive -> O m Archive +updateStyleWithLang Nothing arch = return arch +updateStyleWithLang (Just lang) arch = do + epochtime <- floor `fmap` (lift P.getPOSIXTime) + return arch{ zEntries = [if eRelativePath e == "styles.xml" + then case parseXMLDoc + (toStringLazy (fromEntry e)) of + Nothing -> e + Just d -> + toEntry "styles.xml" epochtime + ( fromStringLazy + . ppTopElement + . addLang lang $ d ) + else e + | e <- zEntries arch] } + +addLang :: Lang -> Element -> Element +addLang lang = everywhere' (mkT updateLangAttr) + where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _) + = Attr n (langLanguage lang) + updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _) + = Attr n (langRegion lang) + updateLangAttr x = x + -- | transform both Image and Math elements transformPicMath :: PandocMonad m => WriterOptions ->Inline -> O m Inline transformPicMath opts (Image attr@(id', cls, _) lab (src,t)) = catchError diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs index 4a0a317fa..52577ac17 100644 --- a/src/Text/Pandoc/Writers/OPML.hs +++ b/src/Text/Pandoc/Writers/OPML.hs @@ -60,9 +60,9 @@ writeOPML opts (Pandoc meta blocks) = do meta' main <- (render colwidth . vcat) <$> (mapM (elementToOPML opts) elements) let context = defField "body" main metadata - return $ case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context writeHtmlInlines :: PandocMonad m => [Inline] -> m Text diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 58295684e..ed3dabb87 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -33,7 +33,7 @@ Conversion of 'Pandoc' documents to OpenDocument XML. -} module Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) where import Control.Arrow ((***), (>>>)) -import Control.Monad.State hiding (when) +import Control.Monad.State.Strict hiding (when) import Data.Char (chr) import Data.List (sortBy) import Data.Text (Text) @@ -50,6 +50,7 @@ import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared import Text.Pandoc.XML +import Text.Pandoc.BCP47 (parseBCP47, Lang(..)) import Text.Printf (printf) -- | Auxiliary function to convert Plain block to Para. @@ -168,8 +169,8 @@ inTextStyle d = do inTags False "style:style" [("style:name", styleName) ,("style:family", "text")] - $ selfClosingTag "style:text-properties" - (concatMap textStyleAttr (Set.toList at))) + $ selfClosingTag "style:text-properties" + (concatMap textStyleAttr (Set.toList at))) return $ inTags False "text:span" [("text:style-name",styleName)] d @@ -219,11 +220,12 @@ writeOpenDocument opts (Pandoc meta blocks) = do let listStyles = map listStyle (stListStyles s) let automaticStyles = vcat $ reverse $ styles ++ listStyles let context = defField "body" body + $ defField "toc" (writerTableOfContents opts) $ defField "automatic-styles" (render' automaticStyles) $ metadata - return $ case writerTemplate opts of - Nothing -> body - Just tpl -> renderTemplate' tpl context + case writerTemplate opts of + Nothing -> return body + Just tpl -> renderTemplate' tpl context withParagraphStyle :: PandocMonad m => WriterOptions -> String -> [Block] -> OD m Doc @@ -326,7 +328,8 @@ blockToOpenDocument o bs then return empty else inParagraphTags =<< inlinesToOpenDocument o b | LineBlock b <- bs = blockToOpenDocument o $ linesToPara b - | Div _ xs <- bs = blocksToOpenDocument o xs + | Div attr xs <- bs = withLangFromAttr attr + (blocksToOpenDocument o xs) | Header i _ b <- bs = setFirstPara >> (inHeaderTags i =<< inlinesToOpenDocument o b) | BlockQuote b <- bs = setFirstPara >> mkBlockQuote b @@ -444,7 +447,7 @@ inlineToOpenDocument o ils | writerWrapText o == WrapPreserve -> return $ preformatted "\n" | otherwise -> return $ space - Span _ xs -> inlinesToOpenDocument o xs + Span attr xs -> withLangFromAttr attr (inlinesToOpenDocument o xs) LineBreak -> return $ selfClosingTag "text:line-break" [] Str s -> return $ handleSpaces $ escapeStringForXML s Emph l -> withTextStyle Italic $ inlinesToOpenDocument o l @@ -606,7 +609,14 @@ paraTableStyles t s (a:xs) [ ("fo:text-align", x) , ("style:justify-single-word", "false")] -data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC | Pre +data TextStyle = Italic + | Bold + | Strike + | Sub + | Sup + | SmallC + | Pre + | Language Lang deriving ( Eq,Ord ) textStyleAttr :: TextStyle -> [(String,String)] @@ -624,4 +634,18 @@ textStyleAttr s | Pre <- s = [("style:font-name" ,"Courier New") ,("style:font-name-asian" ,"Courier New") ,("style:font-name-complex" ,"Courier New")] + | Language lang <- s + = [("fo:language" ,langLanguage lang) + ,("fo:country" ,langRegion lang)] | otherwise = [] + +withLangFromAttr :: PandocMonad m => Attr -> OD m a -> OD m a +withLangFromAttr (_,_,kvs) action = + case lookup "lang" kvs of + Nothing -> action + Just l -> do + case parseBCP47 l of + Right lang -> withTextStyle (Language lang) action + Left _ -> do + report $ InvalidLang l + action diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index e8f48da00..48f17c4fb 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -35,7 +35,7 @@ Conversion of 'Pandoc' documents to Emacs Org-Mode. Org-Mode: <http://orgmode.org> -} module Text.Pandoc.Writers.Org (writeOrg) where -import Control.Monad.State +import Control.Monad.State.Strict import Data.Char (isAlphaNum, toLower) import Data.Text (Text) import Data.List (intersect, intersperse, isPrefixOf, partition, transpose) @@ -86,7 +86,7 @@ pandocToOrg (Pandoc meta blocks) = do $ metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Return Org representation of notes. notesToOrg :: PandocMonad m => [[Block]] -> Org m Doc diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 59f6553e2..019c8335d 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -31,7 +31,7 @@ Conversion of 'Pandoc' documents to reStructuredText. reStructuredText: <http://docutils.sourceforge.net/rst.html> -} module Text.Pandoc.Writers.RST ( writeRST ) where -import Control.Monad.State +import Control.Monad.State.Strict import Data.Char (isSpace, toLower) import Data.List (isPrefixOf, stripPrefix) import Data.Maybe (fromMaybe) @@ -108,7 +108,7 @@ pandocToRST (Pandoc meta blocks) = do $ metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context where normalizeHeadings lev (Header l a i:bs) = Header lev a i:normalizeHeadings (lev+1) cont ++ normalizeHeadings lev bs' diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 5c990f324..48d31c7bf 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -122,13 +122,18 @@ writeRTF options doc = do let context = defField "body" body $ defField "spacer" spacer $ (if writerTableOfContents options - then defField "toc" toc + then defField "table-of-contents" toc + -- for backwards compatibility, + -- we populate toc with the contents + -- of the toc rather than a boolean: + . defField "toc" toc else id) $ metadata - return $ T.pack - $ case writerTemplate options of + T.pack <$> + case writerTemplate options of Just tpl -> renderTemplate' tpl context - Nothing -> case reverse body of + Nothing -> return $ + case reverse body of ('\n':_) -> body _ -> body ++ "\n" diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs index 27d26c7d9..26070966e 100644 --- a/src/Text/Pandoc/Writers/TEI.hs +++ b/src/Text/Pandoc/Writers/TEI.hs @@ -85,7 +85,7 @@ writeTEI opts (Pandoc meta blocks) = do $ metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Convert an Element to TEI. elementToTEI :: PandocMonad m => WriterOptions -> Int -> Element -> m Doc @@ -159,11 +159,13 @@ blockToTEI opts (Div (ident,_,_) [Para lst]) = do let attribs = [("id", ident) | not (null ident)] inTags False "p" attribs <$> inlinesToTEI opts lst blockToTEI opts (Div _ bs) = blocksToTEI opts $ map plainToPara bs -blockToTEI _ (Header _ _ _) = return empty --- should not occur after hierarchicalize +blockToTEI _ h@(Header _ _ _) = do + -- should not occur after hierarchicalize, except inside lists/blockquotes + report $ BlockNotRendered h + return empty -- For TEI simple, text must be within containing block element, so --- we use plainToPara to ensure that Plain text ends up contained by --- something. +-- we use treat as Para to ensure that Plain text ends up contained by +-- something: blockToTEI opts (Plain lst) = blockToTEI opts $ Para lst -- title beginning with fig: indicates that the image is a figure --blockToTEI opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 387e55290..549d4f3d9 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -32,7 +32,7 @@ Conversion of 'Pandoc' format into Texinfo. -} module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where import Control.Monad.Except (throwError) -import Control.Monad.State +import Control.Monad.State.Strict import Data.Char (chr, ord) import Data.List (maximumBy, transpose) import Data.Ord (comparing) @@ -106,7 +106,7 @@ pandocToTexinfo options (Pandoc meta blocks) = do $ metadata case writerTemplate options of Nothing -> return body - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Escape things as needed for Texinfo. stringToTexinfo :: String -> String diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 091a5baca..acc9eaa0f 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -30,7 +30,7 @@ Conversion of 'Pandoc' documents to Textile markup. Textile: <http://thresholdstate.com/articles/4312/the-textile-reference-manual> -} module Text.Pandoc.Writers.Textile ( writeTextile ) where -import Control.Monad.State +import Control.Monad.State.Strict import Data.Char (isSpace) import Data.List (intercalate) import Data.Text (Text, pack) @@ -75,7 +75,7 @@ pandocToTextile opts (Pandoc meta blocks) = do let context = defField "body" main metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context withUseTags :: PandocMonad m => TW m a -> TW m a withUseTags action = do diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs index 5ee239e59..ced02d4be 100644 --- a/src/Text/Pandoc/Writers/ZimWiki.hs +++ b/src/Text/Pandoc/Writers/ZimWiki.hs @@ -33,7 +33,7 @@ http://zim-wiki.org/manual/Help/Wiki_Syntax.html module Text.Pandoc.Writers.ZimWiki ( writeZimWiki ) where import Control.Monad (zipWithM) -import Control.Monad.State (StateT, evalStateT, gets, modify) +import Control.Monad.State.Strict (StateT, evalStateT, gets, modify) import Data.Default (Default (..)) import Data.List (intercalate, isInfixOf, isPrefixOf, transpose) import qualified Data.Map as Map @@ -78,7 +78,7 @@ pandocToZimWiki opts (Pandoc meta blocks) = do $ defField "toc" (writerTableOfContents opts) $ metadata case writerTemplate opts of - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context Nothing -> return main -- | Escape special characters for ZimWiki. |