diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/LaTeX.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 1685 |
1 files changed, 265 insertions, 1420 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index cdccaa535..27c018e73 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,14 +1,10 @@ -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE CPP #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.LaTeX - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -22,50 +18,58 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX, applyMacros, rawLaTeXInline, rawLaTeXBlock, - inlineCommand, - tokenize, - untokenize + inlineCommand ) where import Control.Applicative (many, optional, (<|>)) import Control.Monad import Control.Monad.Except (throwError) -import Data.Char (isDigit, isLetter, toUpper, chr) +import Data.Char (isDigit, isLetter, isAlphaNum, toUpper, chr) import Data.Default -import Data.Functor (($>)) import Data.List (intercalate) import qualified Data.Map as M import Data.Maybe (fromMaybe, maybeToList) import qualified Data.Set as Set -import qualified Data.Sequence as Seq import Data.Text (Text) import qualified Data.Text as T +import Skylighting (defaultSyntaxMap) import System.FilePath (addExtension, replaceExtension, takeExtension) -import Text.Pandoc.BCP47 (Lang (..), renderLang) -import Text.Pandoc.Builder +import Text.Collate.Lang (renderLang) +import Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocPure (PandocPure) import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath, - readFileFromDirs, report, setResourcePath, - setTranslations, translateTerm) + readFileFromDirs, report, + setResourcePath) import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError)) -import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension) +import Text.Pandoc.Highlighting (languagesByExtension) import Text.Pandoc.ImageSize (numUnit, showFl) import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline, optional, space, spaces, withRaw, (<|>)) -import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..), - ArgSpec (..), Tok (..), TokType (..)) +import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..)) import Text.Pandoc.Readers.LaTeX.Parsing -import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47, - babelLangToBCP47) -import Text.Pandoc.Readers.LaTeX.SIunitx +import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites) +import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments, + inlineEnvironment, + mathDisplay, mathInline, + newtheorem, theoremstyle, proof, + theoremEnvironment) +import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments) +import Text.Pandoc.Readers.LaTeX.Macro (macroDef) +import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands, + enquoteCommands, + babelLangToBCP47, setDefaultLanguage) +import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands) +import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands, + nameCommands, charCommands, + accentCommands, + biblatexInlineCommands, + verbCommands, rawInlineOr, + listingsLanguage) import Text.Pandoc.Shared -import qualified Text.Pandoc.Translations as Translations import Text.Pandoc.Walk -import qualified Text.Pandoc.Builder as B -import qualified Data.Text.Normalize as Normalize -import Safe +import Data.List.NonEmpty (nonEmpty) -- for debugging: -- import Text.Pandoc.Extensions (getDefaultExtensions) @@ -73,16 +77,17 @@ import Safe -- import Debug.Trace (traceShowId) -- | Parse LaTeX from string and return 'Pandoc' document. -readLaTeX :: PandocMonad m +readLaTeX :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assumes @'\n'@ line endings) + -> a -- ^ Input to parse -> m Pandoc readLaTeX opts ltx = do + let sources = toSources ltx parsed <- runParserT parseLaTeX def{ sOptions = opts } "source" - (tokenize "source" (crFilter ltx)) + (tokenizeSources sources) case parsed of Right result -> return result - Left e -> throwError $ PandocParsecError ltx e + Left e -> throwError $ PandocParsecError sources e parseLaTeX :: PandocMonad m => LP m Pandoc parseLaTeX = do @@ -93,11 +98,7 @@ parseLaTeX = do let doc' = doc bs let headerLevel (Header n _ _) = [n] headerLevel _ = [] -#if MIN_VERSION_safe(0,3,18) - let bottomLevel = minimumBound 1 $ query headerLevel doc' -#else - let bottomLevel = minimumDef 1 $ query headerLevel doc' -#endif + let bottomLevel = maybe 1 minimum $ nonEmpty $ query headerLevel doc' let adjustHeaders m (Header n attr ils) = Header (n+m) attr ils adjustHeaders _ x = x let (Pandoc _ bs') = @@ -132,11 +133,10 @@ resolveRefs _ x = x rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => ParserT Text s m Text + => ParserT Sources s m Text rawLaTeXBlock = do lookAhead (try (char '\\' >> letter)) - inp <- getInput - let toks = tokenize "source" inp + toks <- getInputTokens snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks <|> rawLaTeXParser toks True (do choice (map controlSeq @@ -163,14 +163,13 @@ beginOrEndCommand = try $ do (txt <> untokenize rawargs) rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => ParserT Text s m Text + => ParserT Sources s m Text rawLaTeXInline = do lookAhead (try (char '\\' >> letter)) - inp <- getInput - let toks = tokenize "source" inp + toks <- getInputTokens raw <- snd <$> ( rawLaTeXParser toks True - (mempty <$ (controlSeq "input" >> skipMany opt >> braced)) + (mempty <$ (controlSeq "input" >> skipMany rawopt >> braced)) inlines <|> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand') inlines @@ -178,11 +177,10 @@ rawLaTeXInline = do finalbraces <- mconcat <$> many (try (string "{}")) -- see #5439 return $ raw <> T.pack finalbraces -inlineCommand :: PandocMonad m => ParserT Text ParserState m Inlines +inlineCommand :: PandocMonad m => ParserT Sources ParserState m Inlines inlineCommand = do lookAhead (try (char '\\' >> letter)) - inp <- getInput - let toks = tokenize "source" inp + toks <- getInputTokens fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand') inlines @@ -191,12 +189,6 @@ inlineCommand = do word :: PandocMonad m => LP m Inlines word = str . untoken <$> satisfyTok isWordTok -regularSymbol :: PandocMonad m => LP m Inlines -regularSymbol = str . untoken <$> satisfyTok isRegularSymbol - where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t - isRegularSymbol _ = False - isSpecial c = c `Set.member` specialChars - inlineGroup :: PandocMonad m => LP m Inlines inlineGroup = do ils <- grouped inline @@ -237,19 +229,6 @@ mkImage options (T.unpack -> src) = do _ -> return src return $ imageWith attr (T.pack src') "" alt -doxspace :: PandocMonad m => LP m Inlines -doxspace = - (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty - where startsWithLetter (Tok _ Word t) = - case T.uncons t of - Just (c, _) | isLetter c -> True - _ -> False - startsWithLetter _ = False - - -lit :: Text -> LP m Inlines -lit = pure . str - removeDoubleQuotes :: Text -> Text removeDoubleQuotes t = Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\"" @@ -296,23 +275,14 @@ quoted' f starter ender = do cs -> cs) else lit startchs -enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines -enquote starred mblang = do - skipopts - let lang = mblang >>= babelLangToBCP47 - let langspan = case lang of - Nothing -> id - Just l -> spanWith ("",[],[("lang", renderLang l)]) - quoteContext <- sQuoteContext <$> getState - if starred || quoteContext == InDoubleQuote - then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok - else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok +lit :: Text -> LP m Inlines +lit = pure . str blockquote :: PandocMonad m => Bool -> Maybe Text -> LP m Blocks blockquote cvariant mblang = do citepar <- if cvariant then (\xs -> para (cite xs mempty)) - <$> cites NormalCitation False + <$> cites inline NormalCitation False else option mempty $ para <$> bracketed inline let lang = mblang >>= babelLangToBCP47 let langdiv = case lang of @@ -323,224 +293,13 @@ blockquote cvariant mblang = do optional $ symbolIn (".:;?!" :: [Char]) -- currently ignored return $ blockQuote . langdiv $ (bs <> citepar) -doAcronym :: PandocMonad m => Text -> LP m Inlines -doAcronym form = do - acro <- braced - return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro), - ("acronym-form", "singular+" <> form)]) - $ str $ untokenize acro] - -doAcronymPlural :: PandocMonad m => Text -> LP m Inlines -doAcronymPlural form = do - acro <- braced - plural <- lit "s" - return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro), - ("acronym-form", "plural+" <> form)]) $ - mconcat [str $ untokenize acro, plural]] - -doverb :: PandocMonad m => LP m Inlines -doverb = do - Tok _ Symbol t <- anySymbol - marker <- case T.uncons t of - Just (c, ts) | T.null ts -> return c - _ -> mzero - withVerbatimMode $ - code . untokenize <$> - manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker) - -verbTok :: PandocMonad m => Char -> LP m Tok -verbTok stopchar = do - t@(Tok pos toktype txt) <- anyTok - case T.findIndex (== stopchar) txt of - Nothing -> return t - Just i -> do - let (t1, t2) = T.splitAt i txt - inp <- getInput - setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar) - : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp - return $ Tok pos toktype t1 - -listingsLanguage :: [(Text, Text)] -> Maybe Text -listingsLanguage opts = - case lookup "language" opts of - Nothing -> Nothing - Just l -> fromListingsLanguage l `mplus` Just l - -dolstinline :: PandocMonad m => LP m Inlines -dolstinline = do - options <- option [] keyvals - let classes = maybeToList $ listingsLanguage options - doinlinecode classes - -domintinline :: PandocMonad m => LP m Inlines -domintinline = do - skipopts - cls <- untokenize <$> braced - doinlinecode [cls] - -doinlinecode :: PandocMonad m => [Text] -> LP m Inlines -doinlinecode classes = do - Tok _ Symbol t <- anySymbol - marker <- case T.uncons t of - Just (c, ts) | T.null ts -> return c - _ -> mzero - let stopchar = if marker == '{' then '}' else marker - withVerbatimMode $ - codeWith ("",classes,[]) . T.map nlToSpace . untokenize <$> - manyTill (verbTok stopchar) (symbol stopchar) - -nlToSpace :: Char -> Char -nlToSpace '\n' = ' ' -nlToSpace x = x - -mathDisplay :: Text -> Inlines -mathDisplay = displayMath . trimMath - -mathInline :: Text -> Inlines -mathInline = math . trimMath - -dollarsMath :: PandocMonad m => LP m Inlines -dollarsMath = do - symbol '$' - display <- option False (True <$ symbol '$') - (do contents <- try $ untokenize <$> pDollarsMath 0 - if display - then mathDisplay contents <$ symbol '$' - else return $ mathInline contents) - <|> (guard display >> return (mathInline "")) - --- Int is number of embedded groupings -pDollarsMath :: PandocMonad m => Int -> LP m [Tok] -pDollarsMath n = do - tk@(Tok _ toktype t) <- anyTok - case toktype of - Symbol | t == "$" - , n == 0 -> return [] - | t == "\\" -> do - tk' <- anyTok - (tk :) . (tk' :) <$> pDollarsMath n - | t == "{" -> (tk :) <$> pDollarsMath (n+1) - | t == "}" -> - if n > 0 - then (tk :) <$> pDollarsMath (n-1) - else mzero - _ -> (tk :) <$> pDollarsMath n - --- citations - -addPrefix :: [Inline] -> [Citation] -> [Citation] -addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks -addPrefix _ _ = [] - -addSuffix :: [Inline] -> [Citation] -> [Citation] -addSuffix s ks@(_:_) = - let k = last ks - in init ks ++ [k {citationSuffix = citationSuffix k ++ s}] -addSuffix _ _ = [] - -simpleCiteArgs :: PandocMonad m => LP m [Citation] -simpleCiteArgs = try $ do - first <- optionMaybe $ toList <$> opt - second <- optionMaybe $ toList <$> opt - keys <- try $ bgroup *> manyTill citationLabel egroup - let (pre, suf) = case (first , second ) of - (Just s , Nothing) -> (mempty, s ) - (Just s , Just t ) -> (s , t ) - _ -> (mempty, mempty) - conv k = Citation { citationId = k - , citationPrefix = [] - , citationSuffix = [] - , citationMode = NormalCitation - , citationHash = 0 - , citationNoteNum = 0 - } - return $ addPrefix pre $ addSuffix suf $ map conv keys - -citationLabel :: PandocMonad m => LP m Text -citationLabel = do - sp - untokenize <$> - (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar) - <* sp - <* optional (symbol ',') - <* sp) - where bibtexKeyChar = ".:;?!`'()/*@_+=-&[]" :: [Char] - -cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation] -cites mode multi = try $ do - cits <- if multi - then do - multiprenote <- optionMaybe $ toList <$> paropt - multipostnote <- optionMaybe $ toList <$> paropt - let (pre, suf) = case (multiprenote, multipostnote) of - (Just s , Nothing) -> (mempty, s) - (Nothing , Just t) -> (mempty, t) - (Just s , Just t ) -> (s, t) - _ -> (mempty, mempty) - tempCits <- many1 simpleCiteArgs - case tempCits of - (k:ks) -> case ks of - (_:_) -> return $ (addMprenote pre k : init ks) ++ - [addMpostnote suf (last ks)] - _ -> return [addMprenote pre (addMpostnote suf k)] - _ -> return [[]] - else count 1 simpleCiteArgs - let cs = concat cits - return $ case mode of - AuthorInText -> case cs of - (c:rest) -> c {citationMode = mode} : rest - [] -> [] - _ -> map (\a -> a {citationMode = mode}) cs - where mprenote (k:ks) = (k:ks) ++ [Space] - mprenote _ = mempty - mpostnote (k:ks) = [Str ",", Space] ++ (k:ks) - mpostnote _ = mempty - addMprenote mpn (k:ks) = - let mpnfinal = case citationPrefix k of - (_:_) -> mprenote mpn - _ -> mpn - in addPrefix mpnfinal (k:ks) - addMprenote _ _ = [] - addMpostnote = addSuffix . mpostnote - -citation :: PandocMonad m => Text -> CitationMode -> Bool -> LP m Inlines -citation name mode multi = do - (c,raw) <- withRaw $ cites mode multi - return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw) - -handleCitationPart :: Inlines -> [Citation] -handleCitationPart ils = - let isCite Cite{} = True - isCite _ = False - (pref, rest) = break isCite (toList ils) - in case rest of - (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs - _ -> [] - -complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines -complexNatbibCitation mode = try $ do - (cs, raw) <- - withRaw $ concat <$> do - bgroup - items <- mconcat <$> - many1 (notFollowedBy (symbol ';') >> inline) - `sepBy1` symbol ';' - egroup - return $ map handleCitationPart items - case cs of - [] -> mzero - (c:cits) -> return $ cite (c{ citationMode = mode }:cits) - (rawInline "latex" $ "\\citetext" <> untokenize raw) - -inNote :: Inlines -> Inlines -inNote ils = - note $ para $ ils <> str "." - inlineCommand' :: PandocMonad m => LP m Inlines inlineCommand' = try $ do Tok _ (CtrlSeq name) cmd <- anyControlSeq guard $ name /= "begin" && name /= "end" && name /= "and" - star <- option "" ("*" <$ symbol '*' <* sp) + star <- if T.all isAlphaNum name + then option "" ("*" <$ symbol '*' <* sp) + else pure "" overlay <- option "" overlaySpecification let name' = name <> star <> overlay let names = ordNub [name', name] -- check non-starred as fallback @@ -551,28 +310,8 @@ inlineCommand' = try $ do <|> ignore rawcommand lookupListDefault raw names inlineCommands - tok :: PandocMonad m => LP m Inlines -tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar' - where singleChar' = do - Tok _ _ t <- singleChar - return $ str t - -opt :: PandocMonad m => LP m Inlines -opt = do - toks <- try (sp *> bracketedToks <* sp) - -- now parse the toks as inlines - st <- getState - parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks - case parsed of - Right result -> return result - Left e -> throwError $ PandocParsecError (untokenize toks) e - -paropt :: PandocMonad m => LP m Inlines -paropt = parenWrapped inline - -inBrackets :: Inlines -> Inlines -inBrackets x = str "[" <> x <> str "]" +tok = tokWith inline unescapeURL :: Text -> Text unescapeURL = T.concat . go . T.splitOn "\\" @@ -585,381 +324,109 @@ unescapeURL = T.concat . go . T.splitOn "\\" , isEscapable c = t | otherwise = "\\" <> t -mathEnvWith :: PandocMonad m - => (Inlines -> a) -> Maybe Text -> Text -> LP m a -mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name - where inner x = case innerEnv of - Nothing -> x - Just y -> "\\begin{" <> y <> "}\n" <> x <> - "\\end{" <> y <> "}" - -mathEnv :: PandocMonad m => Text -> LP m Text -mathEnv name = do - skipopts - optional blankline - res <- manyTill anyTok (end_ name) - return $ stripTrailingNewlines $ untokenize res - -inlineEnvironment :: PandocMonad m => LP m Inlines -inlineEnvironment = try $ do - controlSeq "begin" - name <- untokenize <$> braced - M.findWithDefault mzero name inlineEnvironments - -inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines) -inlineEnvironments = M.fromList [ - ("displaymath", mathEnvWith id Nothing "displaymath") - , ("math", math <$> mathEnv "math") - , ("equation", mathEnvWith id Nothing "equation") - , ("equation*", mathEnvWith id Nothing "equation*") - , ("gather", mathEnvWith id (Just "gathered") "gather") - , ("gather*", mathEnvWith id (Just "gathered") "gather*") - , ("multline", mathEnvWith id (Just "gathered") "multline") - , ("multline*", mathEnvWith id (Just "gathered") "multline*") - , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray") - , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*") - , ("align", mathEnvWith id (Just "aligned") "align") - , ("align*", mathEnvWith id (Just "aligned") "align*") - , ("alignat", mathEnvWith id (Just "aligned") "alignat") - , ("alignat*", mathEnvWith id (Just "aligned") "alignat*") - , ("dmath", mathEnvWith id Nothing "dmath") - , ("dmath*", mathEnvWith id Nothing "dmath*") - , ("dgroup", mathEnvWith id (Just "aligned") "dgroup") - , ("dgroup*", mathEnvWith id (Just "aligned") "dgroup*") - , ("darray", mathEnvWith id (Just "aligned") "darray") - , ("darray*", mathEnvWith id (Just "aligned") "darray*") - ] - inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines) -inlineCommands = M.union inlineLanguageCommands $ M.fromList - [ ("emph", extractSpaces emph <$> tok) - , ("textit", extractSpaces emph <$> tok) - , ("textsl", extractSpaces emph <$> tok) - , ("textsc", extractSpaces smallcaps <$> tok) - , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok) - , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok) - , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok) - , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok) - , ("texttt", ttfamily) - , ("sout", extractSpaces strikeout <$> tok) - , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer - , ("lq", return (str "‘")) - , ("rq", return (str "’")) - , ("textquoteleft", return (str "‘")) - , ("textquoteright", return (str "’")) - , ("textquotedblleft", return (str "“")) - , ("textquotedblright", return (str "”")) - , ("textsuperscript", extractSpaces superscript <$> tok) - , ("textsubscript", extractSpaces subscript <$> tok) - , ("textbackslash", lit "\\") - , ("backslash", lit "\\") - , ("slash", lit "/") - , ("textbf", extractSpaces strong <$> tok) - , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok) - , ("underline", underline <$> tok) - , ("ldots", lit "…") - , ("vdots", lit "\8942") - , ("dots", lit "…") - , ("mdots", lit "…") - , ("sim", lit "~") - , ("sep", lit ",") - , ("label", rawInlineOr "label" dolabel) - , ("ref", rawInlineOr "ref" $ doref "ref") - , ("cref", rawInlineOr "cref" $ doref "ref") -- from cleveref.sty - , ("vref", rawInlineOr "vref" $ doref "ref+page") -- from varioref.sty - , ("eqref", rawInlineOr "eqref" $ doref "eqref") -- from amsmath.sty - , ("mbox", rawInlineOr "mbox" $ processHBox <$> tok) - , ("hbox", rawInlineOr "hbox" $ processHBox <$> tok) - , ("lettrine", rawInlineOr "lettrine" lettrine) - , ("(", mathInline . untokenize <$> manyTill anyTok (controlSeq ")")) - , ("[", mathDisplay . untokenize <$> manyTill anyTok (controlSeq "]")) - , ("ensuremath", mathInline . untokenize <$> braced) - , ("texorpdfstring", const <$> tok <*> tok) - , ("P", lit "¶") - , ("S", lit "§") - , ("$", lit "$") - , ("%", lit "%") - , ("&", lit "&") - , ("#", lit "#") - , ("_", lit "_") - , ("{", lit "{") - , ("}", lit "}") - , ("qed", lit "\a0\x25FB") - -- old TeX commands - , ("em", extractSpaces emph <$> inlines) - , ("it", extractSpaces emph <$> inlines) - , ("sl", extractSpaces emph <$> inlines) - , ("bf", extractSpaces strong <$> inlines) - , ("tt", code . stringify . toList <$> inlines) - , ("rm", inlines) - , ("itshape", extractSpaces emph <$> inlines) - , ("slshape", extractSpaces emph <$> inlines) - , ("scshape", extractSpaces smallcaps <$> inlines) - , ("bfseries", extractSpaces strong <$> inlines) - , ("MakeUppercase", makeUppercase <$> tok) - , ("MakeTextUppercase", makeUppercase <$> tok) -- textcase - , ("uppercase", makeUppercase <$> tok) - , ("MakeLowercase", makeLowercase <$> tok) - , ("MakeTextLowercase", makeLowercase <$> tok) - , ("lowercase", makeLowercase <$> tok) - , ("/", pure mempty) -- italic correction - , ("aa", lit "å") - , ("AA", lit "Å") - , ("ss", lit "ß") - , ("o", lit "ø") - , ("O", lit "Ø") - , ("L", lit "Ł") - , ("l", lit "ł") - , ("ae", lit "æ") - , ("AE", lit "Æ") - , ("oe", lit "œ") - , ("OE", lit "Œ") - , ("pounds", lit "£") - , ("euro", lit "€") - , ("copyright", lit "©") - , ("textasciicircum", lit "^") - , ("textasciitilde", lit "~") - , ("H", accent '\779' Nothing) -- hungarumlaut - , ("`", accent '\768' (Just '`')) -- grave - , ("'", accent '\769' (Just '\'')) -- acute - , ("^", accent '\770' (Just '^')) -- circ - , ("~", accent '\771' (Just '~')) -- tilde - , ("\"", accent '\776' Nothing) -- umlaut - , (".", accent '\775' Nothing) -- dot - , ("=", accent '\772' Nothing) -- macron - , ("|", accent '\781' Nothing) -- vertical line above - , ("b", accent '\817' Nothing) -- macron below - , ("c", accent '\807' Nothing) -- cedilla - , ("G", accent '\783' Nothing) -- doublegrave - , ("h", accent '\777' Nothing) -- hookabove - , ("d", accent '\803' Nothing) -- dotbelow - , ("f", accent '\785' Nothing) -- inverted breve - , ("r", accent '\778' Nothing) -- ringabove - , ("t", accent '\865' Nothing) -- double inverted breve - , ("U", accent '\782' Nothing) -- double vertical line above - , ("v", accent '\780' Nothing) -- hacek - , ("u", accent '\774' Nothing) -- breve - , ("k", accent '\808' Nothing) -- ogonek - , ("textogonekcentered", accent '\808' Nothing) -- ogonek - , ("i", lit "ı") -- dotless i - , ("j", lit "ȷ") -- dotless j - , ("newtie", accent '\785' Nothing) -- inverted breve - , ("textcircled", accent '\8413' Nothing) -- combining circle - , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState - guard $ not inTableCell - optional opt - spaces)) - , (",", lit "\8198") - , ("@", pure mempty) - , (" ", lit "\160") - , ("ps", pure $ str "PS." <> space) - , ("TeX", lit "TeX") - , ("LaTeX", lit "LaTeX") - , ("bar", lit "|") - , ("textless", lit "<") - , ("textgreater", lit ">") - , ("thanks", skipopts >> note <$> grouped block) - , ("footnote", skipopts >> note <$> grouped block) - , ("passthrough", tok) -- \passthrough macro used by latex writer - -- for listings - , ("verb", doverb) - , ("lstinline", dolstinline) - , ("mintinline", domintinline) - , ("Verb", doverb) - , ("url", (\url -> link url "" (str url)) . unescapeURL . untokenize <$> - bracedUrl) - , ("nolinkurl", code . unescapeURL . untokenize <$> bracedUrl) - , ("href", do url <- bracedUrl - sp - link (unescapeURL $ untokenize url) "" <$> tok) - , ("includegraphics", do options <- option [] keyvals - src <- braced - mkImage options . unescapeURL . removeDoubleQuotes $ - untokenize src) - , ("enquote*", enquote True Nothing) - , ("enquote", enquote False Nothing) - -- foreignquote is supposed to use native quote marks - , ("foreignquote*", braced >>= enquote True . Just . untokenize) - , ("foreignquote", braced >>= enquote False . Just . untokenize) - -- hypehnquote uses regular quotes - , ("hyphenquote*", braced >>= enquote True . Just . untokenize) - , ("hyphenquote", braced >>= enquote False . Just . untokenize) - , ("figurename", doTerm Translations.Figure) - , ("prefacename", doTerm Translations.Preface) - , ("refname", doTerm Translations.References) - , ("bibname", doTerm Translations.Bibliography) - , ("chaptername", doTerm Translations.Chapter) - , ("partname", doTerm Translations.Part) - , ("contentsname", doTerm Translations.Contents) - , ("listfigurename", doTerm Translations.ListOfFigures) - , ("listtablename", doTerm Translations.ListOfTables) - , ("indexname", doTerm Translations.Index) - , ("abstractname", doTerm Translations.Abstract) - , ("tablename", doTerm Translations.Table) - , ("enclname", doTerm Translations.Encl) - , ("ccname", doTerm Translations.Cc) - , ("headtoname", doTerm Translations.To) - , ("pagename", doTerm Translations.Page) - , ("seename", doTerm Translations.See) - , ("seealsoname", doTerm Translations.SeeAlso) - , ("proofname", doTerm Translations.Proof) - , ("glossaryname", doTerm Translations.Glossary) - , ("lstlistingname", doTerm Translations.Listing) - , ("cite", citation "cite" NormalCitation False) - , ("Cite", citation "Cite" NormalCitation False) - , ("citep", citation "citep" NormalCitation False) - , ("citep*", citation "citep*" NormalCitation False) - , ("citeal", citation "citeal" NormalCitation False) - , ("citealp", citation "citealp" NormalCitation False) - , ("citealp*", citation "citealp*" NormalCitation False) - , ("autocite", citation "autocite" NormalCitation False) - , ("smartcite", citation "smartcite" NormalCitation False) - , ("footcite", inNote <$> citation "footcite" NormalCitation False) - , ("parencite", citation "parencite" NormalCitation False) - , ("supercite", citation "supercite" NormalCitation False) - , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False) - , ("citeyearpar", citation "citeyearpar" SuppressAuthor False) - , ("citeyear", citation "citeyear" SuppressAuthor False) - , ("autocite*", citation "autocite*" SuppressAuthor False) - , ("cite*", citation "cite*" SuppressAuthor False) - , ("parencite*", citation "parencite*" SuppressAuthor False) - , ("textcite", citation "textcite" AuthorInText False) - , ("citet", citation "citet" AuthorInText False) - , ("citet*", citation "citet*" AuthorInText False) - , ("citealt", citation "citealt" AuthorInText False) - , ("citealt*", citation "citealt*" AuthorInText False) - , ("textcites", citation "textcites" AuthorInText True) - , ("cites", citation "cites" NormalCitation True) - , ("autocites", citation "autocites" NormalCitation True) - , ("footcites", inNote <$> citation "footcites" NormalCitation True) - , ("parencites", citation "parencites" NormalCitation True) - , ("supercites", citation "supercites" NormalCitation True) - , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True) - , ("Autocite", citation "Autocite" NormalCitation False) - , ("Smartcite", citation "Smartcite" NormalCitation False) - , ("Footcite", inNote <$> citation "Footcite" NormalCitation False) - , ("Parencite", citation "Parencite" NormalCitation False) - , ("Supercite", citation "Supercite" NormalCitation False) - , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False) - , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False) - , ("Citeyear", citation "Citeyear" SuppressAuthor False) - , ("Autocite*", citation "Autocite*" SuppressAuthor False) - , ("Cite*", citation "Cite*" SuppressAuthor False) - , ("Parencite*", citation "Parencite*" SuppressAuthor False) - , ("Textcite", citation "Textcite" AuthorInText False) - , ("Textcites", citation "Textcites" AuthorInText True) - , ("Cites", citation "Cites" NormalCitation True) - , ("Autocites", citation "Autocites" NormalCitation True) - , ("Footcites", inNote <$> citation "Footcites" NormalCitation True) - , ("Parencites", citation "Parencites" NormalCitation True) - , ("Supercites", citation "Supercites" NormalCitation True) - , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True) - , ("citetext", complexNatbibCitation NormalCitation) - , ("citeauthor", (try (tok *> sp *> controlSeq "citetext") *> - complexNatbibCitation AuthorInText) - <|> citation "citeauthor" AuthorInText False) - , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>= - addMeta "nocite")) - , ("hyperlink", hyperlink) - , ("hypertarget", hypertargetInline) - -- glossaries package - , ("gls", doAcronym "short") - , ("Gls", doAcronym "short") - , ("glsdesc", doAcronym "long") - , ("Glsdesc", doAcronym "long") - , ("GLSdesc", doAcronym "long") - , ("acrlong", doAcronym "long") - , ("Acrlong", doAcronym "long") - , ("acrfull", doAcronym "full") - , ("Acrfull", doAcronym "full") - , ("acrshort", doAcronym "abbrv") - , ("Acrshort", doAcronym "abbrv") - , ("glspl", doAcronymPlural "short") - , ("Glspl", doAcronymPlural "short") - , ("glsdescplural", doAcronymPlural "long") - , ("Glsdescplural", doAcronymPlural "long") - , ("GLSdescplural", doAcronymPlural "long") - -- acronyms package - , ("ac", doAcronym "short") - , ("acf", doAcronym "full") - , ("acs", doAcronym "abbrv") - , ("acl", doAcronym "long") - , ("acp", doAcronymPlural "short") - , ("acfp", doAcronymPlural "full") - , ("acsp", doAcronymPlural "abbrv") - , ("aclp", doAcronymPlural "long") - , ("Ac", doAcronym "short") - , ("Acf", doAcronym "full") - , ("Acs", doAcronym "abbrv") - , ("Acl", doAcronym "long") - , ("Acp", doAcronymPlural "short") - , ("Acfp", doAcronymPlural "full") - , ("Acsp", doAcronymPlural "abbrv") - , ("Aclp", doAcronymPlural "long") - -- siuntix - , ("si", skipopts *> dosi tok) - , ("SI", doSI tok) - , ("SIrange", doSIrange True tok) - , ("numrange", doSIrange False tok) - , ("numlist", doSInumlist) - , ("num", doSInum) - , ("ang", doSIang) - -- hyphenat - , ("bshyp", lit "\\\173") - , ("fshyp", lit "/\173") - , ("dothyp", lit ".\173") - , ("colonhyp", lit ":\173") - , ("hyp", lit "-") - , ("nohyphens", tok) - , ("textnhtt", ttfamily) - , ("nhttfamily", ttfamily) - -- LaTeX colors - , ("textcolor", coloredInline "color") - , ("colorbox", coloredInline "background-color") - -- fontawesome - , ("faCheck", lit "\10003") - , ("faClose", lit "\10007") - -- xspace - , ("xspace", doxspace) - -- etoolbox - , ("ifstrequal", ifstrequal) - , ("newtoggle", braced >>= newToggle) - , ("toggletrue", braced >>= setToggle True) - , ("togglefalse", braced >>= setToggle False) - , ("iftoggle", try $ ifToggle >> inline) - -- biblatex misc - , ("RN", romanNumeralUpper) - , ("Rn", romanNumeralLower) - -- babel - , ("foreignlanguage", foreignlanguage) - -- include - , ("input", rawInlineOr "input" $ include "input") - -- soul package - , ("ul", underline <$> tok) - -- ulem package - , ("uline", underline <$> tok) - -- plain tex stuff that should just be passed through as raw tex - , ("ifdim", ifdim) - -- stackengine - , ("addstackgap", skipopts *> tok) - ] - -accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines -accent combiningAccent fallBack = try $ do - ils <- tok - case toList ils of - (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $ - -- try to normalize to the combined character: - Str (Normalize.normalize Normalize.NFC - (T.pack [x, combiningAccent]) <> xs) : ys - [Space] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack - [] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack - _ -> return ils - +inlineCommands = M.unions + [ accentCommands tok + , citationCommands inline + , siunitxCommands tok + , acronymCommands + , refCommands + , nameCommands + , verbCommands + , charCommands + , enquoteCommands tok + , inlineLanguageCommands tok + , biblatexInlineCommands tok + , rest ] + where + rest = M.fromList + [ ("emph", extractSpaces emph <$> tok) + , ("textit", extractSpaces emph <$> tok) + , ("textsl", extractSpaces emph <$> tok) + , ("textsc", extractSpaces smallcaps <$> tok) + , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok) + , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok) + , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok) + , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok) + , ("texttt", ttfamily) + , ("sout", extractSpaces strikeout <$> tok) + , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer + , ("textsuperscript", extractSpaces superscript <$> tok) + , ("textsubscript", extractSpaces subscript <$> tok) + , ("textbf", extractSpaces strong <$> tok) + , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok) + , ("underline", underline <$> tok) + , ("mbox", rawInlineOr "mbox" $ processHBox <$> tok) + , ("hbox", rawInlineOr "hbox" $ processHBox <$> tok) + , ("lettrine", rawInlineOr "lettrine" lettrine) + , ("(", mathInline . untokenize <$> manyTill anyTok (controlSeq ")")) + , ("[", mathDisplay . untokenize <$> manyTill anyTok (controlSeq "]")) + , ("ensuremath", mathInline . untokenize <$> braced) + , ("texorpdfstring", const <$> tok <*> tok) + -- old TeX commands + , ("em", extractSpaces emph <$> inlines) + , ("it", extractSpaces emph <$> inlines) + , ("sl", extractSpaces emph <$> inlines) + , ("bf", extractSpaces strong <$> inlines) + , ("tt", code . stringify . toList <$> inlines) + , ("rm", inlines) + , ("itshape", extractSpaces emph <$> inlines) + , ("slshape", extractSpaces emph <$> inlines) + , ("scshape", extractSpaces smallcaps <$> inlines) + , ("bfseries", extractSpaces strong <$> inlines) + , ("MakeUppercase", makeUppercase <$> tok) + , ("MakeTextUppercase", makeUppercase <$> tok) -- textcase + , ("uppercase", makeUppercase <$> tok) + , ("MakeLowercase", makeLowercase <$> tok) + , ("MakeTextLowercase", makeLowercase <$> tok) + , ("lowercase", makeLowercase <$> tok) + , ("thanks", skipopts >> note <$> grouped block) + , ("footnote", skipopts >> note <$> grouped block) + , ("passthrough", tok) -- \passthrough macro used by latex writer + -- for listings + , ("includegraphics", do options <- option [] keyvals + src <- braced + mkImage options . + unescapeURL . + removeDoubleQuotes $ untokenize src) + -- hyperref + , ("url", (\url -> link url "" (str url)) . unescapeURL . untokenize <$> + bracedUrl) + , ("nolinkurl", code . unescapeURL . untokenize <$> bracedUrl) + , ("href", do url <- bracedUrl + sp + link (unescapeURL $ untokenize url) "" <$> tok) + , ("hyperlink", hyperlink) + , ("hyperref", hyperref) + , ("hypertarget", hypertargetInline) + -- hyphenat + , ("nohyphens", tok) + , ("textnhtt", ttfamily) + , ("nhttfamily", ttfamily) + -- LaTeX colors + , ("textcolor", coloredInline "color") + , ("colorbox", coloredInline "background-color") + -- etoolbox + , ("ifstrequal", ifstrequal) + , ("newtoggle", braced >>= newToggle) + , ("toggletrue", braced >>= setToggle True) + , ("togglefalse", braced >>= setToggle False) + , ("iftoggle", try $ ifToggle >> inline) + -- include + , ("input", rawInlineOr "input" $ include "input") + -- soul package + , ("ul", underline <$> tok) + -- ulem package + , ("uline", underline <$> tok) + -- plain tex stuff that should just be passed through as raw tex + , ("ifdim", ifdim) + -- stackengine + , ("addstackgap", skipopts *> tok) + ] lettrine :: PandocMonad m => LP m Inlines lettrine = do - optional opt + optional rawopt x <- tok y <- tok return $ extractSpaces (spanWith ("",["lettrine"],[])) x <> smallcaps y @@ -979,32 +446,18 @@ alterStr :: (Text -> Text) -> Inline -> Inline alterStr f (Str xs) = Str (f xs) alterStr _ x = x -foreignlanguage :: PandocMonad m => LP m Inlines -foreignlanguage = do - babelLang <- untokenize <$> braced - case babelLangToBCP47 babelLang of - Just lang -> spanWith ("", [], [("lang", renderLang lang)]) <$> tok - _ -> tok - -inlineLanguageCommands :: PandocMonad m => M.Map Text (LP m Inlines) -inlineLanguageCommands = M.fromList $ mk <$> M.toList polyglossiaLangToBCP47 - where - mk (polyglossia, bcp47Func) = - ("text" <> polyglossia, inlineLanguage bcp47Func) - -inlineLanguage :: PandocMonad m => (Text -> Lang) -> LP m Inlines -inlineLanguage bcp47Func = do - o <- option "" $ T.filter (\c -> c /= '[' && c /= ']') - <$> rawopt - let lang = renderLang $ bcp47Func o - extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok - hyperlink :: PandocMonad m => LP m Inlines hyperlink = try $ do src <- untokenize <$> braced lab <- tok return $ link ("#" <> src) "" lab +hyperref :: PandocMonad m => LP m Inlines +hyperref = try $ do + url <- (("#" <>) . untokenize <$> try (sp *> bracketedToks <* sp)) + <|> untokenize <$> (bracedUrl <* bracedUrl <* bracedUrl) + link url "" <$> tok + hypertargetBlock :: PandocMonad m => LP m Blocks hypertargetBlock = try $ do ref <- untokenize <$> braced @@ -1019,31 +472,6 @@ hypertargetInline = try $ do ils <- grouped inline return $ spanWith (ref, [], []) ils -romanNumeralUpper :: (PandocMonad m) => LP m Inlines -romanNumeralUpper = - str . toRomanNumeral <$> romanNumeralArg - -romanNumeralLower :: (PandocMonad m) => LP m Inlines -romanNumeralLower = - str . T.toLower . toRomanNumeral <$> romanNumeralArg - -romanNumeralArg :: (PandocMonad m) => LP m Int -romanNumeralArg = spaces *> (parser <|> inBraces) - where - inBraces = do - symbol '{' - spaces - res <- parser - spaces - symbol '}' - return res - parser = do - Tok _ Word s <- satisfyTok isWordTok - let (digits, rest) = T.span isDigit s - unless (T.null rest) $ - Prelude.fail "Non-digits in argument to \\Rn or \\RN" - safeRead digits - newToggle :: (Monoid a, PandocMonad m) => [Tok] -> LP m a newToggle name = do updateState $ \st -> @@ -1074,9 +502,6 @@ ifToggle = do report $ UndefinedToggle name' pos return () -doTerm :: PandocMonad m => Translations.Term -> LP m Inlines -doTerm term = str <$> translateTerm term - ifstrequal :: (PandocMonad m, Monoid a) => LP m a ifstrequal = do str1 <- tok @@ -1097,13 +522,6 @@ coloredInline stylename = do ttfamily :: PandocMonad m => LP m Inlines ttfamily = code . stringify . toList <$> tok -rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines -rawInlineOr name' fallback = do - parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions - if parseRaw - then rawInline "latex" <$> getRawCommand name' ("\\" <> name') - else fallback - processHBox :: Inlines -> Inlines processHBox = walk convert where @@ -1154,79 +572,90 @@ treatAsInline = Set.fromList , "pagebreak" ] -label :: PandocMonad m => LP m () -label = do - controlSeq "label" - t <- braced - updateState $ \st -> st{ sLastLabel = Just $ untokenize t } - -dolabel :: PandocMonad m => LP m Inlines -dolabel = do - v <- braced - let refstr = untokenize v - updateState $ \st -> - st{ sLastLabel = Just refstr } - return $ spanWith (refstr,[],[("label", refstr)]) - $ inBrackets $ str $ untokenize v - -doref :: PandocMonad m => Text -> LP m Inlines -doref cls = do - v <- braced - let refstr = untokenize v - return $ linkWith ("",[],[ ("reference-type", cls) - , ("reference", refstr)]) - ("#" <> refstr) - "" - (inBrackets $ str refstr) - lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v lookupListDefault d = (fromMaybe d .) . lookupList where lookupList l m = msum $ map (`M.lookup` m) l inline :: PandocMonad m => LP m Inlines -inline = (mempty <$ comment) - <|> (space <$ whitespace) - <|> (softbreak <$ endline) - <|> word - <|> macroDef (rawInline "latex") - <|> inlineCommand' - <|> inlineEnvironment - <|> inlineGroup - <|> (symbol '-' *> - option (str "-") (symbol '-' *> - option (str "–") (str "—" <$ symbol '-'))) - <|> doubleQuote - <|> singleQuote - <|> (str "”" <$ try (symbol '\'' >> symbol '\'')) - <|> (str "”" <$ symbol '”') - <|> (str "’" <$ symbol '\'') - <|> (str "’" <$ symbol '’') - <|> (str "\160" <$ symbol '~') - <|> dollarsMath - <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb) - <|> (str . T.singleton <$> primEscape) - <|> regularSymbol - <|> (do res <- symbolIn "#^'`\"[]&" - pos <- getPosition - let s = untoken res - report $ ParsingUnescaped s pos - return $ str s) +inline = do + Tok pos toktype t <- lookAhead anyTok + let symbolAsString = str . untoken <$> anySymbol + let unescapedSymbolAsString = + do s <- untoken <$> anySymbol + report $ ParsingUnescaped s pos + return $ str s + case toktype of + Comment -> mempty <$ comment + Spaces -> space <$ whitespace + Newline -> softbreak <$ endline + Word -> word + Esc1 -> str . T.singleton <$> primEscape + Esc2 -> str . T.singleton <$> primEscape + Symbol -> + case t of + "-" -> symbol '-' *> + option (str "-") (symbol '-' *> + option (str "–") (str "—" <$ symbol '-')) + "'" -> symbol '\'' *> + option (str "’") (str "”" <$ symbol '\'') + "~" -> str "\160" <$ symbol '~' + "`" -> doubleQuote <|> singleQuote <|> symbolAsString + "\"" -> doubleQuote <|> singleQuote <|> symbolAsString + "“" -> doubleQuote <|> symbolAsString + "‘" -> singleQuote <|> symbolAsString + "$" -> dollarsMath <|> unescapedSymbolAsString + "|" -> (guardEnabled Ext_literate_haskell *> + symbol '|' *> doLHSverb) <|> symbolAsString + "{" -> inlineGroup + "#" -> unescapedSymbolAsString + "&" -> unescapedSymbolAsString + "_" -> unescapedSymbolAsString + "^" -> unescapedSymbolAsString + "\\" -> mzero + "}" -> mzero + _ -> symbolAsString + CtrlSeq _ -> macroDef (rawInline "latex") + <|> inlineCommand' + <|> inlineEnvironment + <|> inlineGroup + _ -> mzero inlines :: PandocMonad m => LP m Inlines inlines = mconcat <$> many inline +opt :: PandocMonad m => LP m Inlines +opt = do + toks <- try (sp *> bracketedToks <* sp) + -- now parse the toks as inlines + st <- getState + parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks + case parsed of + Right result -> return result + Left e -> throwError $ PandocParsecError (toSources toks) e + -- block elements: preamble :: PandocMonad m => LP m Blocks preamble = mconcat <$> many preambleBlock where preambleBlock = (mempty <$ spaces1) <|> macroDef (rawBlock "latex") + <|> filecontents <|> (mempty <$ blockCommand) <|> (mempty <$ braced) <|> (do notFollowedBy (begin_ "document") anyTok return mempty) +rule :: PandocMonad m => LP m Blocks +rule = do + skipopts + width <- T.takeWhile (\c -> isDigit c || c == '.') . stringify <$> tok + _thickness <- tok + -- 0-width rules are used to fix spacing issues: + case safeRead width of + Just (0 :: Double) -> return mempty + _ -> return horizontalRule + paragraph :: PandocMonad m => LP m Blocks paragraph = do x <- trimInlines . mconcat <$> many1 inline @@ -1264,6 +693,16 @@ include name = do mapM_ (insertIncluded defaultExt) fs return mempty +readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text) +readFileFromTexinputs fp = do + fileContentsMap <- sFileContents <$> getState + case M.lookup (T.pack fp) fileContentsMap of + Just t -> return (Just t) + Nothing -> do + dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." + <$> lookupEnv "TEXINPUTS" + readFileFromDirs dirs fp + insertIncluded :: PandocMonad m => FilePath -> FilePath @@ -1273,13 +712,12 @@ insertIncluded defaultExtension f' = do ".tex" -> f' ".sty" -> f' _ -> addExtension f' defaultExtension - dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." <$> lookupEnv "TEXINPUTS" pos <- getPosition containers <- getIncludeFiles <$> getState when (T.pack f `elem` containers) $ throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos updateState $ addIncludeFile $ T.pack f - mbcontents <- readFileFromDirs dirs f + mbcontents <- readFileFromTexinputs f contents <- case mbcontents of Just s -> return s Nothing -> do @@ -1288,10 +726,6 @@ insertIncluded defaultExtension f' = do getInput >>= setInput . (tokenize f contents ++) updateState dropLatestIncludeFile -addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m () -addMeta field val = updateState $ \st -> - st{ sMeta = addMetaField field val $ sMeta st } - authors :: PandocMonad m => LP m () authors = try $ do bgroup @@ -1300,150 +734,6 @@ authors = try $ do egroup addMeta "author" (map trimInlines auths) -macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a -macroDef constructor = do - (_, s) <- withRaw (commandDef <|> environmentDef) - (constructor (untokenize s) <$ - guardDisabled Ext_latex_macros) - <|> return mempty - where commandDef = do - (name, macro') <- newcommand <|> letmacro <|> defmacro - guardDisabled Ext_latex_macros <|> - updateState (\s -> s{ sMacros = M.insert name macro' (sMacros s) }) - environmentDef = do - mbenv <- newenvironment - case mbenv of - Nothing -> return () - Just (name, macro1, macro2) -> - guardDisabled Ext_latex_macros <|> - do updateState $ \s -> s{ sMacros = - M.insert name macro1 (sMacros s) } - updateState $ \s -> s{ sMacros = - M.insert ("end" <> name) macro2 (sMacros s) } - -- @\newenvironment{envname}[n-args][default]{begin}{end}@ - -- is equivalent to - -- @\newcommand{\envname}[n-args][default]{begin}@ - -- @\newcommand{\endenvname}@ - -letmacro :: PandocMonad m => LP m (Text, Macro) -letmacro = do - controlSeq "let" - (name, contents) <- withVerbatimMode $ do - Tok _ (CtrlSeq name) _ <- anyControlSeq - optional $ symbol '=' - spaces - -- we first parse in verbatim mode, and then expand macros, - -- because we don't want \let\foo\bar to turn into - -- \let\foo hello if we have previously \def\bar{hello} - contents <- bracedOrToken - return (name, contents) - contents' <- doMacros' 0 contents - return (name, Macro ExpandWhenDefined [] Nothing contents') - -defmacro :: PandocMonad m => LP m (Text, Macro) -defmacro = try $ - -- we use withVerbatimMode, because macros are to be expanded - -- at point of use, not point of definition - withVerbatimMode $ do - controlSeq "def" - Tok _ (CtrlSeq name) _ <- anyControlSeq - argspecs <- many (argspecArg <|> argspecPattern) - contents <- bracedOrToken - return (name, Macro ExpandWhenUsed argspecs Nothing contents) - -argspecArg :: PandocMonad m => LP m ArgSpec -argspecArg = do - Tok _ (Arg i) _ <- satisfyTok isArgTok - return $ ArgNum i - -argspecPattern :: PandocMonad m => LP m ArgSpec -argspecPattern = - Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) -> - (toktype' == Symbol || toktype' == Word) && - (txt /= "{" && txt /= "\\" && txt /= "}"))) - -newcommand :: PandocMonad m => LP m (Text, Macro) -newcommand = do - pos <- getPosition - Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|> - controlSeq "renewcommand" <|> - controlSeq "providecommand" <|> - controlSeq "DeclareMathOperator" <|> - controlSeq "DeclareRobustCommand" - withVerbatimMode $ do - Tok _ (CtrlSeq name) txt <- do - optional (symbol '*') - anyControlSeq <|> - (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}') - spaces - numargs <- option 0 $ try bracketedNum - let argspecs = map ArgNum [1..numargs] - spaces - optarg <- option Nothing $ Just <$> try bracketedToks - spaces - contents' <- bracedOrToken - let contents = - case mtype of - "DeclareMathOperator" -> - Tok pos (CtrlSeq "mathop") "\\mathop" - : Tok pos Symbol "{" - : Tok pos (CtrlSeq "mathrm") "\\mathrm" - : Tok pos Symbol "{" - : (contents' ++ - [ Tok pos Symbol "}", Tok pos Symbol "}" ]) - _ -> contents' - macros <- sMacros <$> getState - case M.lookup name macros of - Just macro - | mtype == "newcommand" -> do - report $ MacroAlreadyDefined txt pos - return (name, macro) - | mtype == "providecommand" -> return (name, macro) - _ -> return (name, Macro ExpandWhenUsed argspecs optarg contents) - -newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro)) -newenvironment = do - pos <- getPosition - Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|> - controlSeq "renewenvironment" <|> - controlSeq "provideenvironment" - withVerbatimMode $ do - optional $ symbol '*' - spaces - name <- untokenize <$> braced - spaces - numargs <- option 0 $ try bracketedNum - spaces - optarg <- option Nothing $ Just <$> try bracketedToks - let argspecs = map (\i -> ArgNum i) [1..numargs] - startcontents <- spaces >> bracedOrToken - endcontents <- spaces >> bracedOrToken - macros <- sMacros <$> getState - case M.lookup name macros of - Just _ - | mtype == "newenvironment" -> do - report $ MacroAlreadyDefined name pos - return Nothing - | mtype == "provideenvironment" -> - return Nothing - _ -> return $ Just (name, - Macro ExpandWhenUsed argspecs optarg startcontents, - Macro ExpandWhenUsed [] Nothing endcontents) - -bracketedNum :: PandocMonad m => LP m Int -bracketedNum = do - ds <- untokenize <$> bracketedToks - case safeRead ds of - Just i -> return i - _ -> return 0 - -setCaption :: PandocMonad m => LP m () -setCaption = try $ do - skipopts - ils <- tok - optional $ try $ spaces *> label - updateState $ \st -> st{ sCaption = Just ils } - looseItem :: PandocMonad m => LP m Blocks looseItem = do inListItem <- sInListItem <$> getState @@ -1457,10 +747,6 @@ epigraph = do p2 <- grouped block return $ divWith ("", ["epigraph"], []) (p1 <> p2) -resetCaption :: PandocMonad m => LP m () -resetCaption = updateState $ \st -> st{ sCaption = Nothing - , sLastLabel = Nothing } - section :: PandocMonad m => Attr -> Int -> LP m Blocks section (ident, classes, kvs) lvl = do skipopts @@ -1554,7 +840,7 @@ blockCommands = M.fromList , ("address", mempty <$ (skipopts *> tok >>= addMeta "address")) , ("signature", mempty <$ (skipopts *> authors)) , ("date", mempty <$ (skipopts *> tok >>= addMeta "date")) - , ("newtheorem", newtheorem) + , ("newtheorem", newtheorem inline) , ("theoremstyle", theoremstyle) -- KOMA-Script metadata commands , ("extratitle", mempty <$ (skipopts *> tok >>= addMeta "extratitle")) @@ -1598,11 +884,11 @@ blockCommands = M.fromList -- , ("hrule", pure horizontalRule) , ("strut", pure mempty) - , ("rule", skipopts *> tok *> tok $> horizontalRule) + , ("rule", rule) , ("item", looseItem) , ("documentclass", skipopts *> braced *> preamble) , ("centerline", para . trimInlines <$> (skipopts *> tok)) - , ("caption", mempty <$ setCaption) + , ("caption", mempty <$ setCaption inline) , ("bibliography", mempty <$ (skipopts *> braced >>= addMeta "bibliography" . splitBibs . untokenize)) , ("addbibresource", mempty <$ (skipopts *> braced >>= @@ -1640,7 +926,8 @@ blockCommands = M.fromList environments :: PandocMonad m => M.Map Text (LP m Blocks) -environments = M.fromList +environments = M.union (tableEnvironments blocks inline) $ + M.fromList [ ("document", env "document" blocks <* skipMany anyTok) , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract")) , ("sloppypar", env "sloppypar" blocks) @@ -1654,13 +941,6 @@ environments = M.fromList , ("flushright", divWith ("", ["flushright"], []) <$> env "flushright" blocks) , ("flushleft", divWith ("", ["flushleft"], []) <$> env "flushleft" blocks) , ("landscape", env "landscape" blocks) - , ("longtable", env "longtable" $ - resetCaption *> simpTable "longtable" False >>= addTableCaption) - , ("table", env "table" $ - skipopts *> resetCaption *> blocks >>= addTableCaption) - , ("tabular*", env "tabular*" $ simpTable "tabular*" True) - , ("tabularx", env "tabularx" $ simpTable "tabularx" True) - , ("tabular", env "tabular" $ simpTable "tabular" False) , ("quote", blockQuote <$> env "quote" blocks) , ("quotation", blockQuote <$> env "quotation" blocks) , ("verse", blockQuote <$> env "verse" blocks) @@ -1683,7 +963,7 @@ environments = M.fromList , ("lilypond", rawVerbEnv "lilypond") , ("ly", rawVerbEnv "ly") -- amsthm - , ("proof", proof) + , ("proof", proof blocks opt) -- etoolbox , ("ifstrequal", ifstrequal) , ("newtoggle", braced >>= newToggle) @@ -1692,130 +972,29 @@ environments = M.fromList , ("iftoggle", try $ ifToggle >> block) ] -theoremstyle :: PandocMonad m => LP m Blocks -theoremstyle = do - stylename <- untokenize <$> braced - let mbstyle = case stylename of - "plain" -> Just PlainStyle - "definition" -> Just DefinitionStyle - "remark" -> Just RemarkStyle - _ -> Nothing - case mbstyle of - Nothing -> return () - Just sty -> updateState $ \s -> s{ sLastTheoremStyle = sty } - return mempty - -newtheorem :: PandocMonad m => LP m Blocks -newtheorem = do - number <- option True (False <$ symbol '*' <* sp) +filecontents :: PandocMonad m => LP m Blocks +filecontents = try $ do + controlSeq "begin" name <- untokenize <$> braced - sp - series <- option Nothing $ Just . untokenize <$> bracketedToks - sp - showName <- tok - sp - syncTo <- option Nothing $ Just . untokenize <$> bracketedToks - sty <- sLastTheoremStyle <$> getState - let spec = TheoremSpec { theoremName = showName - , theoremStyle = sty - , theoremSeries = series - , theoremSyncTo = syncTo - , theoremNumber = number - , theoremLastNum = DottedNum [0] } - tmap <- sTheoremMap <$> getState - updateState $ \s -> s{ sTheoremMap = - M.insert name spec tmap } + guard $ name == "filecontents" || name == "filecontents*" + skipopts + fp <- untokenize <$> braced + txt <- verbEnv name + updateState $ \st -> + st{ sFileContents = M.insert fp txt (sFileContents st) } return mempty -proof :: PandocMonad m => LP m Blocks -proof = do - title <- option (B.text "Proof") opt - bs <- env "proof" blocks - return $ - B.divWith ("", ["proof"], []) $ - addQed $ addTitle (B.emph (title <> ".")) bs - -addTitle :: Inlines -> Blocks -> Blocks -addTitle ils bs = - case B.toList bs of - (Para xs : rest) - -> B.fromList (Para (B.toList ils ++ (Space : xs)) : rest) - _ -> B.para ils <> bs - -addQed :: Blocks -> Blocks -addQed bs = - case Seq.viewr (B.unMany bs) of - s Seq.:> Para ils - -> B.Many (s Seq.|> Para (ils ++ B.toList qedSign)) - _ -> bs <> B.para qedSign - where - qedSign = B.str "\xa0\x25FB" - environment :: PandocMonad m => LP m Blocks environment = try $ do controlSeq "begin" name <- untokenize <$> braced M.findWithDefault mzero name environments <|> - theoremEnvironment name <|> + theoremEnvironment blocks opt name <|> if M.member name (inlineEnvironments :: M.Map Text (LP PandocPure Inlines)) then mzero else try (rawEnv name) <|> rawVerbEnv name -theoremEnvironment :: PandocMonad m => Text -> LP m Blocks -theoremEnvironment name = do - tmap <- sTheoremMap <$> getState - case M.lookup name tmap of - Nothing -> mzero - Just tspec -> do - optTitle <- option mempty $ (\x -> space <> "(" <> x <> ")") <$> opt - mblabel <- option Nothing $ Just . untokenize <$> - try (spaces >> controlSeq "label" >> spaces >> braced) - bs <- env name blocks - number <- - if theoremNumber tspec - then do - let name' = fromMaybe name $ theoremSeries tspec - num <- getNextNumber - (maybe (DottedNum [0]) theoremLastNum . - M.lookup name' . sTheoremMap) - updateState $ \s -> - s{ sTheoremMap = - M.adjust - (\spec -> spec{ theoremLastNum = num }) - name' - (sTheoremMap s) - } - - case mblabel of - Just ident -> - updateState $ \s -> - s{ sLabels = M.insert ident - (B.toList $ - theoremName tspec <> "\160" <> - str (renderDottedNum num)) (sLabels s) } - Nothing -> return () - return $ space <> B.text (renderDottedNum num) - else return mempty - let titleEmph = case theoremStyle tspec of - PlainStyle -> B.strong - DefinitionStyle -> B.strong - RemarkStyle -> B.emph - let title = titleEmph (theoremName tspec <> number) - <> optTitle <> "." <> space - return $ divWith (fromMaybe "" mblabel, [name], []) $ addTitle title - $ case theoremStyle tspec of - PlainStyle -> walk italicize bs - _ -> bs - -italicize :: Block -> Block -italicize (Para ils) = Para [Emph ils] -italicize (Plain ils) = Plain [Emph ils] -italicize x = x - -env :: PandocMonad m => Text -> LP m a -> LP m a -env name p = p <* end_ name - rawEnv :: PandocMonad m => Text -> LP m Blocks rawEnv name = do exts <- getOption readerExtensions @@ -1823,15 +1002,17 @@ rawEnv name = do rawOptions <- mconcat <$> many rawopt let beginCommand = "\\begin{" <> name <> "}" <> rawOptions pos1 <- getPosition - (bs, raw) <- withRaw $ env name blocks if parseRaw - then return $ rawBlock "latex" + then do + (_, raw) <- withRaw $ env name blocks + return $ rawBlock "latex" $ beginCommand <> untokenize raw else do + bs <- env name blocks report $ SkippedContent beginCommand pos1 pos2 <- getPosition report $ SkippedContent ("\\end{" <> name <> "}") pos2 - return bs + return $ divWith ("",[name],[]) bs rawVerbEnv :: PandocMonad m => Text -> LP m Blocks rawVerbEnv name = do @@ -1890,8 +1071,7 @@ inputMinted = do pos <- getPosition attr <- mintedAttr f <- T.filter (/='"') . untokenize <$> braced - dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." <$> lookupEnv "TEXINPUTS" - mbCode <- readFileFromDirs dirs (T.unpack f) + mbCode <- readFileFromTexinputs (T.unpack f) rawcode <- case mbCode of Just s -> return s Nothing -> do @@ -1989,8 +1169,7 @@ inputListing = do pos <- getPosition options <- option [] keyvals f <- T.filter (/='"') . untokenize <$> braced - dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." <$> lookupEnv "TEXINPUTS" - mbCode <- readFileFromDirs dirs (T.unpack f) + mbCode <- readFileFromTexinputs (T.unpack f) codeLines <- case mbCode of Just s -> return $ T.lines s Nothing -> do @@ -1999,7 +1178,8 @@ inputListing = do let (ident,classes,kvs) = parseListingsOptions options let classes' = (case listingsLanguage options of - Nothing -> (take 1 (languagesByExtension (T.pack $ takeExtension $ T.unpack f)) <>) + Nothing -> (take 1 (languagesByExtension defaultSyntaxMap + (T.pack $ takeExtension $ T.unpack f)) <>) Just _ -> id) classes let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead let lastline = fromMaybe (length codeLines) $ @@ -2065,358 +1245,23 @@ orderedList' = try $ do bs <- listenv "enumerate" (many item) return $ orderedListWith (start, style, delim) bs --- tables - -hline :: PandocMonad m => LP m () -hline = try $ do - spaces - controlSeq "hline" <|> - -- booktabs rules: - controlSeq "toprule" <|> - controlSeq "bottomrule" <|> - controlSeq "midrule" <|> - controlSeq "endhead" <|> - controlSeq "endfirsthead" - spaces - optional opt - return () - -lbreak :: PandocMonad m => LP m Tok -lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline") - <* skipopts <* spaces - -amp :: PandocMonad m => LP m Tok -amp = symbol '&' - --- Split a Word into individual Symbols (for parseAligns) -splitWordTok :: PandocMonad m => LP m () -splitWordTok = do - inp <- getInput - case inp of - (Tok spos Word t : rest) -> - setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest - _ -> return () - -parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))] -parseAligns = try $ do - let maybeBar = skipMany - (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced))) - let cAlign = AlignCenter <$ symbol 'c' - let lAlign = AlignLeft <$ symbol 'l' - let rAlign = AlignRight <$ symbol 'r' - let parAlign = AlignLeft <$ symbol 'p' - -- aligns from tabularx - let xAlign = AlignLeft <$ symbol 'X' - let mAlign = AlignLeft <$ symbol 'm' - let bAlign = AlignLeft <$ symbol 'b' - let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign - <|> xAlign <|> mAlign <|> bAlign ) - let alignPrefix = symbol '>' >> braced - let alignSuffix = symbol '<' >> braced - let colWidth = try $ do - symbol '{' - ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth") - spaces - symbol '}' - return $ safeRead ds - let alignSpec = do - pref <- option [] alignPrefix - spaces - al <- alignChar - width <- colWidth <|> option Nothing (do s <- untokenize <$> braced - pos <- getPosition - report $ SkippedContent s pos - return Nothing) - spaces - suff <- option [] alignSuffix - return (al, width, (pref, suff)) - let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro - symbol '*' - spaces - ds <- trim . untokenize <$> braced - spaces - spec <- braced - case safeRead ds of - Just n -> - getInput >>= setInput . (mconcat (replicate n spec) ++) - Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number" - bgroup - spaces - maybeBar - aligns' <- many $ try $ spaces >> optional starAlign >> - (alignSpec <* maybeBar) - spaces - egroup - spaces - return $ map toSpec aligns' - where - toColWidth (Just w) | w > 0 = ColWidth w - toColWidth _ = ColWidthDefault - toSpec (x, y, z) = (x, toColWidth y, z) - --- N.B. this parser returns a Row that may have erroneous empty cells --- in it. See the note above fixTableHead for details. -parseTableRow :: PandocMonad m - => Text -- ^ table environment name - -> [([Tok], [Tok])] -- ^ pref/suffixes - -> LP m Row -parseTableRow envname prefsufs = do - notFollowedBy (spaces *> end_ envname) - -- add prefixes and suffixes in token stream: - let celltoks (pref, suff) = do - prefpos <- getPosition - contents <- mconcat <$> - many ( snd <$> withRaw (controlSeq "parbox" >> parbox) -- #5711 - <|> - snd <$> withRaw (inlineEnvironment <|> dollarsMath) - <|> - (do notFollowedBy - (() <$ amp <|> () <$ lbreak <|> end_ envname) - count 1 anyTok) ) - - suffpos <- getPosition - option [] (count 1 amp) - return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff - rawcells <- mapM celltoks prefsufs - oldInput <- getInput - cells <- mapM (\ts -> setInput ts >> parseTableCell) rawcells - setInput oldInput - spaces - return $ Row nullAttr cells - -parseTableCell :: PandocMonad m => LP m Cell -parseTableCell = do - spaces - updateState $ \st -> st{ sInTableCell = True } - cell' <- multicolumnCell - <|> multirowCell - <|> parseSimpleCell - <|> parseEmptyCell - updateState $ \st -> st{ sInTableCell = False } - spaces - return cell' - where - -- The parsing of empty cells is important in LaTeX, especially when dealing - -- with multirow/multicolumn. See #6603. - parseEmptyCell = spaces $> emptyCell - -cellAlignment :: PandocMonad m => LP m Alignment -cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|') - where - alignment = do - c <- untoken <$> singleChar - return $ case c of - "l" -> AlignLeft - "r" -> AlignRight - "c" -> AlignCenter - "*" -> AlignDefault - _ -> AlignDefault - -plainify :: Blocks -> Blocks -plainify bs = case toList bs of - [Para ils] -> plain (fromList ils) - _ -> bs - -multirowCell :: PandocMonad m => LP m Cell -multirowCell = controlSeq "multirow" >> do - -- Full prototype for \multirow macro is: - -- \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text} - -- However, everything except `nrows` and `text` make - -- sense in the context of the Pandoc AST - _ <- optional $ symbol '[' *> cellAlignment <* symbol ']' -- vertical position - nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced - _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- bigstrut-related - _ <- symbol '{' *> manyTill anyTok (symbol '}') -- Cell width - _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- Length used for fine-tuning - content <- symbol '{' *> (plainify <$> blocks) <* symbol '}' - return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content - -multicolumnCell :: PandocMonad m => LP m Cell -multicolumnCell = controlSeq "multicolumn" >> do - span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced - alignment <- symbol '{' *> cellAlignment <* symbol '}' - - let singleCell = do - content <- plainify <$> blocks - return $ cell alignment (RowSpan 1) (ColSpan span') content - - -- Two possible contents: either a \multirow cell, or content. - -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}} - -- Note that a \multirow cell can be nested in a \multicolumn, - -- but not the other way around. See #6603 - let nestedCell = do - (Cell _ _ (RowSpan rs) _ bs) <- multirowCell - return $ cell - alignment - (RowSpan rs) - (ColSpan span') - (fromList bs) - - symbol '{' *> (nestedCell <|> singleCell) <* symbol '}' - --- Parse a simple cell, i.e. not multirow/multicol -parseSimpleCell :: PandocMonad m => LP m Cell -parseSimpleCell = simpleCell <$> (plainify <$> blocks) - --- LaTeX tables are stored with empty cells underneath multirow cells --- denoting the grid spaces taken up by them. More specifically, if a --- cell spans m rows, then it will overwrite all the cells in the --- columns it spans for (m-1) rows underneath it, requiring padding --- cells in these places. These padding cells need to be removed for --- proper table reading. See #6603. --- --- These fixTable functions do not otherwise fix up malformed --- input tables: that is left to the table builder. -fixTableHead :: TableHead -> TableHead -fixTableHead (TableHead attr rows) = TableHead attr rows' - where - rows' = fixTableRows rows - -fixTableBody :: TableBody -> TableBody -fixTableBody (TableBody attr rhc th tb) - = TableBody attr rhc th' tb' - where - th' = fixTableRows th - tb' = fixTableRows tb - -fixTableRows :: [Row] -> [Row] -fixTableRows = fixTableRows' $ repeat Nothing - where - fixTableRows' oldHang (Row attr cells : rs) - = let (newHang, cells') = fixTableRow oldHang cells - rs' = fixTableRows' newHang rs - in Row attr cells' : rs' - fixTableRows' _ [] = [] - --- The overhang is represented as Just (relative cell dimensions) or --- Nothing for an empty grid space. -fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell]) -fixTableRow oldHang cells - -- If there's overhang, drop cells until their total width meets the - -- width of the occupied grid spaces (or we run out) - | (n, prefHang, restHang) <- splitHang oldHang - , n > 0 - = let cells' = dropToWidth getCellW n cells - (restHang', cells'') = fixTableRow restHang cells' - in (prefHang restHang', cells'') - -- Otherwise record the overhang of a pending cell and fix the rest - -- of the row - | c@(Cell _ _ h w _):cells' <- cells - = let h' = max 1 h - w' = max 1 w - oldHang' = dropToWidth getHangW w' oldHang - (newHang, cells'') = fixTableRow oldHang' cells' - in (toHang w' h' <> newHang, c : cells'') - | otherwise - = (oldHang, []) - where - getCellW (Cell _ _ _ w _) = w - getHangW = maybe 1 fst - getCS (ColSpan n) = n - - toHang c r - | r > 1 = [Just (c, r)] - | otherwise = replicate (getCS c) Nothing - - -- Take the prefix of the overhang list representing filled grid - -- spaces. Also return the remainder and the length of this prefix. - splitHang = splitHang' 0 id - - splitHang' !n l (Just (c, r):xs) - = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs - splitHang' n l xs = (n, l, xs) - - -- Drop list items until the total width of the dropped items - -- exceeds the passed width. - dropToWidth _ n l | n < 1 = l - dropToWidth wproj n (c:cs) = dropToWidth wproj (n - wproj c) cs - dropToWidth _ _ [] = [] - -simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks -simpTable envname hasWidthParameter = try $ do - when hasWidthParameter $ () <$ (spaces >> tok) - skipopts - colspecs <- parseAligns - let (aligns, widths, prefsufs) = unzip3 colspecs - optional $ controlSeq "caption" *> setCaption - spaces - optional label - spaces - optional lbreak - spaces - skipMany hline - spaces - header' <- option [] . try . fmap (:[]) $ - parseTableRow envname prefsufs <* lbreak <* many1 hline - spaces - rows <- sepEndBy (parseTableRow envname prefsufs) - (lbreak <* optional (skipMany hline)) - spaces - optional $ controlSeq "caption" *> setCaption - spaces - optional label - spaces - optional lbreak - spaces - lookAhead $ controlSeq "end" -- make sure we're at end - let th = fixTableHead $ TableHead nullAttr header' - let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows] - let tf = TableFoot nullAttr [] - return $ table emptyCaption (zip aligns widths) th tbs tf - -addTableCaption :: PandocMonad m => Blocks -> LP m Blocks -addTableCaption = walkM go - where go (Table attr c spec th tb tf) = do - st <- getState - let mblabel = sLastLabel st - capt <- case (sCaption st, mblabel) of - (Just ils, Nothing) -> return $ caption Nothing (plain ils) - (Just ils, Just lab) -> do - num <- getNextNumber sLastTableNum - setState - st{ sLastTableNum = num - , sLabels = M.insert lab - [Str (renderDottedNum num)] - (sLabels st) } - return $ caption Nothing (plain ils) -- add number?? - (Nothing, _) -> return c - let attr' = case (attr, mblabel) of - ((_,classes,kvs), Just ident) -> - (ident,classes,kvs) - _ -> attr - return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf - go x = return x - --- TODO: For now we add a Div to contain table attributes, since --- most writers don't do anything yet with attributes on Table. --- This can be removed when that changes. -addAttrDiv :: Attr -> Block -> Block -addAttrDiv ("",[],[]) b = b -addAttrDiv attr b = Div attr [b] - block :: PandocMonad m => LP m Blocks block = do - res <- (mempty <$ spaces1) - <|> environment - <|> macroDef (rawBlock "latex") - <|> blockCommand - <|> paragraph - <|> grouped block + Tok _ toktype _ <- lookAhead anyTok + res <- (case toktype of + Newline -> mempty <$ spaces1 + Spaces -> mempty <$ spaces1 + Comment -> mempty <$ spaces1 + Word -> paragraph + CtrlSeq "begin" -> environment + CtrlSeq _ -> macroDef (rawBlock "latex") + <|> blockCommand + _ -> mzero) + <|> paragraph + <|> grouped block trace (T.take 60 $ tshow $ B.toList res) return res blocks :: PandocMonad m => LP m Blocks blocks = mconcat <$> many block -setDefaultLanguage :: PandocMonad m => LP m Blocks -setDefaultLanguage = do - o <- option "" $ T.filter (\c -> c /= '[' && c /= ']') - <$> rawopt - polylang <- untokenize <$> braced - case M.lookup polylang polyglossiaLangToBCP47 of - Nothing -> return mempty -- TODO mzero? warning? - Just langFunc -> do - let l = langFunc o - setTranslations l - updateState $ setMeta "lang" $ str (renderLang l) - return mempty |