diff options
| author | Igor Pashev <pashev.igor@gmail.com> | 2021-07-17 18:10:34 +0200 |
|---|---|---|
| committer | Igor Pashev <pashev.igor@gmail.com> | 2021-07-17 18:46:16 +0200 |
| commit | 48459559a13a20083fc9b31eb523b8ea2bf0a63f (patch) | |
| tree | 1c04e75709457403110a6f8c5c90099f22369de3 /src/Text/Pandoc/Readers/LaTeX | |
| parent | 0c39509d9b6a58958228cebf5d643598e5c98950 (diff) | |
| parent | 46099e79defe662e541b12548200caf29063c1c6 (diff) | |
| download | pandoc-48459559a13a20083fc9b31eb523b8ea2bf0a63f.tar.gz | |
Merge branch 'master' of https://github.com/jgm/pandoc
Diffstat (limited to 'src/Text/Pandoc/Readers/LaTeX')
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/Citation.hs | 210 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/Inline.hs | 397 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/Lang.hs | 321 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/Macro.hs | 184 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/Math.hs | 221 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 273 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 223 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/Table.hs | 379 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/Types.hs | 17 |
9 files changed, 1973 insertions, 252 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX/Citation.hs b/src/Text/Pandoc/Readers/LaTeX/Citation.hs new file mode 100644 index 000000000..af97125c6 --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Citation.hs @@ -0,0 +1,210 @@ +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE OverloadedStrings #-} +module Text.Pandoc.Readers.LaTeX.Citation + ( citationCommands + , cites + ) +where + +import Text.Pandoc.Class +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Builder as B +import qualified Data.Map as M +import Data.Text (Text) +import Control.Applicative ((<|>), optional, many) +import Control.Monad (mzero) +import Control.Monad.Trans (lift) +import Control.Monad.Except (throwError) +import Text.Pandoc.Error (PandocError(PandocParsecError)) +import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline, + optional, space, spaces, withRaw, (<|>)) + +citationCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines) +citationCommands inline = + let citation = citationWith inline + tok = spaces *> grouped inline + in M.fromList + [ ("cite", citation "cite" NormalCitation False) + , ("Cite", citation "Cite" NormalCitation False) + , ("citep", citation "citep" NormalCitation False) + , ("citep*", citation "citep*" NormalCitation False) + , ("citeal", citation "citeal" NormalCitation False) + , ("citealp", citation "citealp" NormalCitation False) + , ("citealp*", citation "citealp*" NormalCitation False) + , ("autocite", citation "autocite" NormalCitation False) + , ("smartcite", citation "smartcite" NormalCitation False) + , ("footcite", inNote <$> citation "footcite" NormalCitation False) + , ("parencite", citation "parencite" NormalCitation False) + , ("supercite", citation "supercite" NormalCitation False) + , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False) + , ("citeyearpar", citation "citeyearpar" SuppressAuthor False) + , ("citeyear", citation "citeyear" SuppressAuthor False) + , ("autocite*", citation "autocite*" SuppressAuthor False) + , ("cite*", citation "cite*" SuppressAuthor False) + , ("parencite*", citation "parencite*" SuppressAuthor False) + , ("textcite", citation "textcite" AuthorInText False) + , ("citet", citation "citet" AuthorInText False) + , ("citet*", citation "citet*" AuthorInText False) + , ("citealt", citation "citealt" AuthorInText False) + , ("citealt*", citation "citealt*" AuthorInText False) + , ("textcites", citation "textcites" AuthorInText True) + , ("cites", citation "cites" NormalCitation True) + , ("autocites", citation "autocites" NormalCitation True) + , ("footcites", inNote <$> citation "footcites" NormalCitation True) + , ("parencites", citation "parencites" NormalCitation True) + , ("supercites", citation "supercites" NormalCitation True) + , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True) + , ("Autocite", citation "Autocite" NormalCitation False) + , ("Smartcite", citation "Smartcite" NormalCitation False) + , ("Footcite", inNote <$> citation "Footcite" NormalCitation False) + , ("Parencite", citation "Parencite" NormalCitation False) + , ("Supercite", citation "Supercite" NormalCitation False) + , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False) + , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False) + , ("Citeyear", citation "Citeyear" SuppressAuthor False) + , ("Autocite*", citation "Autocite*" SuppressAuthor False) + , ("Cite*", citation "Cite*" SuppressAuthor False) + , ("Parencite*", citation "Parencite*" SuppressAuthor False) + , ("Textcite", citation "Textcite" AuthorInText False) + , ("Textcites", citation "Textcites" AuthorInText True) + , ("Cites", citation "Cites" NormalCitation True) + , ("Autocites", citation "Autocites" NormalCitation True) + , ("Footcites", inNote <$> citation "Footcites" NormalCitation True) + , ("Parencites", citation "Parencites" NormalCitation True) + , ("Supercites", citation "Supercites" NormalCitation True) + , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True) + , ("citetext", complexNatbibCitation inline NormalCitation) + , ("citeauthor", (try (tok *> sp *> controlSeq "citetext") *> + complexNatbibCitation inline AuthorInText) + <|> citation "citeauthor" AuthorInText False) + , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>= + addMeta "nocite")) + ] + +-- citations + +addPrefix :: [Inline] -> [Citation] -> [Citation] +addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks +addPrefix _ _ = [] + +addSuffix :: [Inline] -> [Citation] -> [Citation] +addSuffix s ks@(_:_) = + let k = last ks + in init ks ++ [k {citationSuffix = citationSuffix k ++ s}] +addSuffix _ _ = [] + +simpleCiteArgs :: forall m . PandocMonad m => LP m Inlines -> LP m [Citation] +simpleCiteArgs inline = try $ do + first <- optionMaybe $ toList <$> opt + second <- optionMaybe $ toList <$> opt + keys <- try $ bgroup *> manyTill citationLabel egroup + let (pre, suf) = case (first , second ) of + (Just s , Nothing) -> (mempty, s ) + (Just s , Just t ) -> (s , t ) + _ -> (mempty, mempty) + conv k = Citation { citationId = k + , citationPrefix = [] + , citationSuffix = [] + , citationMode = NormalCitation + , citationHash = 0 + , citationNoteNum = 0 + } + return $ addPrefix pre $ addSuffix suf $ map conv keys + where + opt :: PandocMonad m => LP m Inlines + opt = do + toks <- try (sp *> bracketedToks <* sp) + -- now parse the toks as inlines + st <- getState + parsed <- lift $ + runParserT (mconcat <$> many inline) st "bracketed option" toks + case parsed of + Right result -> return result + Left e -> throwError $ PandocParsecError (toSources toks) e + + + +citationLabel :: PandocMonad m => LP m Text +citationLabel = do + sp + untokenize <$> + (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar) + <* sp + <* optional (symbol ',') + <* sp) + where bibtexKeyChar = ".:;?!`'()/*@_+=-&[]" :: [Char] + +cites :: PandocMonad m + => LP m Inlines -> CitationMode -> Bool -> LP m [Citation] +cites inline mode multi = try $ do + let paropt = parenWrapped inline + cits <- if multi + then do + multiprenote <- optionMaybe $ toList <$> paropt + multipostnote <- optionMaybe $ toList <$> paropt + let (pre, suf) = case (multiprenote, multipostnote) of + (Just s , Nothing) -> (mempty, s) + (Nothing , Just t) -> (mempty, t) + (Just s , Just t ) -> (s, t) + _ -> (mempty, mempty) + tempCits <- many1 $ simpleCiteArgs inline + case tempCits of + (k:ks) -> case ks of + (_:_) -> return $ (addMprenote pre k : init ks) ++ + [addMpostnote suf (last ks)] + _ -> return [addMprenote pre (addMpostnote suf k)] + _ -> return [[]] + else count 1 $ simpleCiteArgs inline + let cs = concat cits + return $ case mode of + AuthorInText -> case cs of + (c:rest) -> c {citationMode = mode} : rest + [] -> [] + _ -> map (\a -> a {citationMode = mode}) cs + where mprenote (k:ks) = (k:ks) ++ [Space] + mprenote _ = mempty + mpostnote (k:ks) = [Str ",", Space] ++ (k:ks) + mpostnote _ = mempty + addMprenote mpn (k:ks) = + let mpnfinal = case citationPrefix k of + (_:_) -> mprenote mpn + _ -> mpn + in addPrefix mpnfinal (k:ks) + addMprenote _ _ = [] + addMpostnote = addSuffix . mpostnote + +citationWith :: PandocMonad m + => LP m Inlines -> Text -> CitationMode -> Bool -> LP m Inlines +citationWith inline name mode multi = do + (c,raw) <- withRaw $ cites inline mode multi + return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw) + +handleCitationPart :: Inlines -> [Citation] +handleCitationPart ils = + let isCite Cite{} = True + isCite _ = False + (pref, rest) = break isCite (toList ils) + in case rest of + (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs + _ -> [] + +complexNatbibCitation :: PandocMonad m + => LP m Inlines -> CitationMode -> LP m Inlines +complexNatbibCitation inline mode = try $ do + (cs, raw) <- + withRaw $ concat <$> do + bgroup + items <- mconcat <$> + many1 (notFollowedBy (symbol ';') >> inline) + `sepBy1` symbol ';' + egroup + return $ map handleCitationPart items + case cs of + [] -> mzero + (c:cits) -> return $ cite (c{ citationMode = mode }:cits) + (rawInline "latex" $ "\\citetext" <> untokenize raw) + +inNote :: Inlines -> Inlines +inNote ils = + note $ para $ ils <> str "." + diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs new file mode 100644 index 000000000..7b8bca4af --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs @@ -0,0 +1,397 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} +{- | + Module : Text.Pandoc.Readers.LaTeX.Inline + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable +-} +module Text.Pandoc.Readers.LaTeX.Inline + ( acronymCommands + , verbCommands + , charCommands + , accentCommands + , nameCommands + , biblatexInlineCommands + , refCommands + , rawInlineOr + , listingsLanguage + ) +where + +import qualified Data.Map as M +import Data.Text (Text) +import qualified Data.Text as T +import Text.Pandoc.Builder +import Text.Pandoc.Shared (toRomanNumeral, safeRead) +import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..)) +import Control.Applicative (optional, (<|>)) +import Control.Monad (guard, mzero, mplus, unless) +import Text.Pandoc.Class.PandocMonad (PandocMonad (..), translateTerm) +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Extensions (extensionEnabled, Extension(..)) +import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy, + manyTill, getInput, setInput, incSourceColumn, + option, many1, try) +import Data.Char (isDigit) +import Text.Pandoc.Highlighting (fromListingsLanguage,) +import Data.Maybe (maybeToList, fromMaybe) +import Text.Pandoc.Options (ReaderOptions(..)) +import qualified Data.Text.Normalize as Normalize +import qualified Text.Pandoc.Translations as Translations + +rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines +rawInlineOr name' fallback = do + parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions + if parseRaw + then rawInline "latex" <$> getRawCommand name' ("\\" <> name') + else fallback + +dolabel :: PandocMonad m => LP m Inlines +dolabel = do + v <- braced + let refstr = untokenize v + updateState $ \st -> + st{ sLastLabel = Just refstr } + return $ spanWith (refstr,[],[("label", refstr)]) + $ inBrackets $ str $ untokenize v + +doref :: PandocMonad m => Text -> LP m Inlines +doref cls = do + v <- braced + let refstr = untokenize v + return $ linkWith ("",[],[ ("reference-type", cls) + , ("reference", refstr)]) + ("#" <> refstr) + "" + (inBrackets $ str refstr) + +inBrackets :: Inlines -> Inlines +inBrackets x = str "[" <> x <> str "]" + +doTerm :: PandocMonad m => Translations.Term -> LP m Inlines +doTerm term = str <$> translateTerm term + +lit :: Text -> LP m Inlines +lit = pure . str + +doverb :: PandocMonad m => LP m Inlines +doverb = do + Tok _ Symbol t <- anySymbol + marker <- case T.uncons t of + Just (c, ts) | T.null ts -> return c + _ -> mzero + withVerbatimMode $ + code . untokenize <$> + manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker) + +verbTok :: PandocMonad m => Char -> LP m Tok +verbTok stopchar = do + t@(Tok pos toktype txt) <- anyTok + case T.findIndex (== stopchar) txt of + Nothing -> return t + Just i -> do + let (t1, t2) = T.splitAt i txt + inp <- getInput + setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar) + : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp + return $ Tok pos toktype t1 + +listingsLanguage :: [(Text, Text)] -> Maybe Text +listingsLanguage opts = + case lookup "language" opts of + Nothing -> Nothing + Just l -> fromListingsLanguage l `mplus` Just l + +dolstinline :: PandocMonad m => LP m Inlines +dolstinline = do + options <- option [] keyvals + let classes = maybeToList $ listingsLanguage options + doinlinecode classes + +domintinline :: PandocMonad m => LP m Inlines +domintinline = do + skipopts + cls <- untokenize <$> braced + doinlinecode [cls] + +doinlinecode :: PandocMonad m => [Text] -> LP m Inlines +doinlinecode classes = do + Tok _ Symbol t <- anySymbol + marker <- case T.uncons t of + Just (c, ts) | T.null ts -> return c + _ -> mzero + let stopchar = if marker == '{' then '}' else marker + withVerbatimMode $ + codeWith ("",classes,[]) . T.map nlToSpace . untokenize <$> + manyTill (verbTok stopchar) (symbol stopchar) + +nlToSpace :: Char -> Char +nlToSpace '\n' = ' ' +nlToSpace x = x + +romanNumeralUpper :: (PandocMonad m) => LP m Inlines +romanNumeralUpper = + str . toRomanNumeral <$> romanNumeralArg + +romanNumeralLower :: (PandocMonad m) => LP m Inlines +romanNumeralLower = + str . T.toLower . toRomanNumeral <$> romanNumeralArg + +romanNumeralArg :: (PandocMonad m) => LP m Int +romanNumeralArg = spaces *> (parser <|> inBraces) + where + inBraces = do + symbol '{' + spaces + res <- parser + spaces + symbol '}' + return res + parser = do + s <- untokenize <$> many1 (satisfyTok isWordTok) + let (digits, rest) = T.span isDigit s + unless (T.null rest) $ + Prelude.fail "Non-digits in argument to \\Rn or \\RN" + safeRead digits + +accentWith :: PandocMonad m + => LP m Inlines -> Char -> Maybe Char -> LP m Inlines +accentWith tok combiningAccent fallBack = try $ do + ils <- tok + case toList ils of + (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $ + -- try to normalize to the combined character: + Str (Normalize.normalize Normalize.NFC + (T.pack [x, combiningAccent]) <> xs) : ys + [Space] -> return $ str $ T.singleton + $ fromMaybe combiningAccent fallBack + [] -> return $ str $ T.singleton + $ fromMaybe combiningAccent fallBack + _ -> return ils + + +verbCommands :: PandocMonad m => M.Map Text (LP m Inlines) +verbCommands = M.fromList + [ ("verb", doverb) + , ("lstinline", dolstinline) + , ("mintinline", domintinline) + , ("Verb", doverb) + ] + +accentCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines) +accentCommands tok = + let accent = accentWith tok + in M.fromList + [ ("aa", lit "å") + , ("AA", lit "Å") + , ("ss", lit "ß") + , ("o", lit "ø") + , ("O", lit "Ø") + , ("L", lit "Ł") + , ("l", lit "ł") + , ("ae", lit "æ") + , ("AE", lit "Æ") + , ("oe", lit "œ") + , ("OE", lit "Œ") + , ("pounds", lit "£") + , ("euro", lit "€") + , ("copyright", lit "©") + , ("textasciicircum", lit "^") + , ("textasciitilde", lit "~") + , ("H", accent '\779' Nothing) -- hungarumlaut + , ("`", accent '\768' (Just '`')) -- grave + , ("'", accent '\769' (Just '\'')) -- acute + , ("^", accent '\770' (Just '^')) -- circ + , ("~", accent '\771' (Just '~')) -- tilde + , ("\"", accent '\776' Nothing) -- umlaut + , (".", accent '\775' Nothing) -- dot + , ("=", accent '\772' Nothing) -- macron + , ("|", accent '\781' Nothing) -- vertical line above + , ("b", accent '\817' Nothing) -- macron below + , ("c", accent '\807' Nothing) -- cedilla + , ("G", accent '\783' Nothing) -- doublegrave + , ("h", accent '\777' Nothing) -- hookabove + , ("d", accent '\803' Nothing) -- dotbelow + , ("f", accent '\785' Nothing) -- inverted breve + , ("r", accent '\778' Nothing) -- ringabove + , ("t", accent '\865' Nothing) -- double inverted breve + , ("U", accent '\782' Nothing) -- double vertical line above + , ("v", accent '\780' Nothing) -- hacek + , ("u", accent '\774' Nothing) -- breve + , ("k", accent '\808' Nothing) -- ogonek + , ("textogonekcentered", accent '\808' Nothing) -- ogonek + , ("i", lit "ı") -- dotless i + , ("j", lit "ȷ") -- dotless j + , ("newtie", accent '\785' Nothing) -- inverted breve + , ("textcircled", accent '\8413' Nothing) -- combining circle + ] + +charCommands :: PandocMonad m => M.Map Text (LP m Inlines) +charCommands = M.fromList + [ ("ldots", lit "…") + , ("vdots", lit "\8942") + , ("dots", lit "…") + , ("mdots", lit "…") + , ("sim", lit "~") + , ("sep", lit ",") + , ("P", lit "¶") + , ("S", lit "§") + , ("$", lit "$") + , ("%", lit "%") + , ("&", lit "&") + , ("#", lit "#") + , ("_", lit "_") + , ("{", lit "{") + , ("}", lit "}") + , ("qed", lit "\a0\x25FB") + , ("lq", return (str "‘")) + , ("rq", return (str "’")) + , ("textquoteleft", return (str "‘")) + , ("textquoteright", return (str "’")) + , ("textquotedblleft", return (str "“")) + , ("textquotedblright", return (str "”")) + , ("/", pure mempty) -- italic correction + , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState + guard $ not inTableCell + optional rawopt + spaces)) + , (",", lit "\8198") + , ("@", pure mempty) + , (" ", lit "\160") + , ("ps", pure $ str "PS." <> space) + , ("TeX", lit "TeX") + , ("LaTeX", lit "LaTeX") + , ("bar", lit "|") + , ("textless", lit "<") + , ("textgreater", lit ">") + , ("textbackslash", lit "\\") + , ("backslash", lit "\\") + , ("slash", lit "/") + -- fontawesome + , ("faCheck", lit "\10003") + , ("faClose", lit "\10007") + -- hyphenat + , ("bshyp", lit "\\\173") + , ("fshyp", lit "/\173") + , ("dothyp", lit ".\173") + , ("colonhyp", lit ":\173") + , ("hyp", lit "-") + ] + +biblatexInlineCommands :: PandocMonad m + => LP m Inlines -> M.Map Text (LP m Inlines) +biblatexInlineCommands tok = M.fromList + -- biblatex misc + [ ("RN", romanNumeralUpper) + , ("Rn", romanNumeralLower) + , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok) + , ("mkbibemph", spanWith nullAttr . emph <$> tok) + , ("mkbibitalic", spanWith nullAttr . emph <$> tok) + , ("mkbibbold", spanWith nullAttr . strong <$> tok) + , ("mkbibparens", + spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok) + , ("mkbibbrackets", + spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok) + , ("autocap", spanWith nullAttr <$> tok) + , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok) + , ("bibstring", + (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize + <$> braced) + , ("adddot", pure (str ".")) + , ("adddotspace", pure (spanWith nullAttr (str "." <> space))) + , ("addabbrvspace", pure space) + , ("hyphen", pure (str "-")) + ] + +nameCommands :: PandocMonad m => M.Map Text (LP m Inlines) +nameCommands = M.fromList + [ ("figurename", doTerm Translations.Figure) + , ("prefacename", doTerm Translations.Preface) + , ("refname", doTerm Translations.References) + , ("bibname", doTerm Translations.Bibliography) + , ("chaptername", doTerm Translations.Chapter) + , ("partname", doTerm Translations.Part) + , ("contentsname", doTerm Translations.Contents) + , ("listfigurename", doTerm Translations.ListOfFigures) + , ("listtablename", doTerm Translations.ListOfTables) + , ("indexname", doTerm Translations.Index) + , ("abstractname", doTerm Translations.Abstract) + , ("tablename", doTerm Translations.Table) + , ("enclname", doTerm Translations.Encl) + , ("ccname", doTerm Translations.Cc) + , ("headtoname", doTerm Translations.To) + , ("pagename", doTerm Translations.Page) + , ("seename", doTerm Translations.See) + , ("seealsoname", doTerm Translations.SeeAlso) + , ("proofname", doTerm Translations.Proof) + , ("glossaryname", doTerm Translations.Glossary) + , ("lstlistingname", doTerm Translations.Listing) + ] + +refCommands :: PandocMonad m => M.Map Text (LP m Inlines) +refCommands = M.fromList + [ ("label", rawInlineOr "label" dolabel) + , ("ref", rawInlineOr "ref" $ doref "ref") + , ("cref", rawInlineOr "cref" $ doref "ref") -- from cleveref.sty + , ("vref", rawInlineOr "vref" $ doref "ref+page") -- from varioref.sty + , ("eqref", rawInlineOr "eqref" $ doref "eqref") -- from amsmath.sty + ] + +acronymCommands :: PandocMonad m => M.Map Text (LP m Inlines) +acronymCommands = M.fromList + -- glossaries package + [ ("gls", doAcronym "short") + , ("Gls", doAcronym "short") + , ("glsdesc", doAcronym "long") + , ("Glsdesc", doAcronym "long") + , ("GLSdesc", doAcronym "long") + , ("acrlong", doAcronym "long") + , ("Acrlong", doAcronym "long") + , ("acrfull", doAcronym "full") + , ("Acrfull", doAcronym "full") + , ("acrshort", doAcronym "abbrv") + , ("Acrshort", doAcronym "abbrv") + , ("glspl", doAcronymPlural "short") + , ("Glspl", doAcronymPlural "short") + , ("glsdescplural", doAcronymPlural "long") + , ("Glsdescplural", doAcronymPlural "long") + , ("GLSdescplural", doAcronymPlural "long") + -- acronyms package + , ("ac", doAcronym "short") + , ("acf", doAcronym "full") + , ("acs", doAcronym "abbrv") + , ("acl", doAcronym "long") + , ("acp", doAcronymPlural "short") + , ("acfp", doAcronymPlural "full") + , ("acsp", doAcronymPlural "abbrv") + , ("aclp", doAcronymPlural "long") + , ("Ac", doAcronym "short") + , ("Acf", doAcronym "full") + , ("Acs", doAcronym "abbrv") + , ("Acl", doAcronym "long") + , ("Acp", doAcronymPlural "short") + , ("Acfp", doAcronymPlural "full") + , ("Acsp", doAcronymPlural "abbrv") + , ("Aclp", doAcronymPlural "long") + ] + +doAcronym :: PandocMonad m => Text -> LP m Inlines +doAcronym form = do + acro <- braced + return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro), + ("acronym-form", "singular+" <> form)]) + $ str $ untokenize acro] + +doAcronymPlural :: PandocMonad m => Text -> LP m Inlines +doAcronymPlural form = do + acro <- braced + let plural = str "s" + return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro), + ("acronym-form", "plural+" <> form)]) $ + mconcat [str $ untokenize acro, plural]] + + diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs index 814b2fe79..6a8327904 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.LaTeX.Lang - Copyright : Copyright (C) 2018-2020 John MacFarlane + Copyright : Copyright (C) 2018-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -12,144 +12,223 @@ Functions for parsing polyglossia and babel language specifiers to BCP47 'Lang'. -} module Text.Pandoc.Readers.LaTeX.Lang - ( polyglossiaLangToBCP47 + ( setDefaultLanguage + , polyglossiaLangToBCP47 , babelLangToBCP47 + , enquoteCommands + , inlineLanguageCommands ) where import qualified Data.Map as M +import Data.Text (Text) import qualified Data.Text as T -import Text.Pandoc.BCP47 (Lang(..)) +import Text.Pandoc.Shared (extractSpaces) +import Text.Collate.Lang (Lang(..), renderLang) +import Text.Pandoc.Class (PandocMonad(..), setTranslations) +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..), + withQuoteContext) +import Text.Pandoc.Builder (Blocks, Inlines, setMeta, str, spanWith, + singleQuoted, doubleQuoted) + +enquote :: PandocMonad m + => LP m Inlines + -> Bool -> Maybe Text -> LP m Inlines +enquote tok starred mblang = do + skipopts + let lang = mblang >>= babelLangToBCP47 + let langspan = case lang of + Nothing -> id + Just l -> spanWith ("",[],[("lang", renderLang l)]) + quoteContext <- sQuoteContext <$> getState + if starred || quoteContext == InDoubleQuote + then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok + else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok + +enquoteCommands :: PandocMonad m + => LP m Inlines -> M.Map Text (LP m Inlines) +enquoteCommands tok = M.fromList + [ ("enquote*", enquote tok True Nothing) + , ("enquote", enquote tok False Nothing) + -- foreignquote is supposed to use native quote marks + , ("foreignquote*", braced >>= enquote tok True . Just . untokenize) + , ("foreignquote", braced >>= enquote tok False . Just . untokenize) + -- hypehnquote uses regular quotes + , ("hyphenquote*", braced >>= enquote tok True . Just . untokenize) + , ("hyphenquote", braced >>= enquote tok False . Just . untokenize) + ] + +foreignlanguage :: PandocMonad m => LP m Inlines -> LP m Inlines +foreignlanguage tok = do + babelLang <- untokenize <$> braced + case babelLangToBCP47 babelLang of + Just lang -> spanWith ("", [], [("lang", renderLang lang)]) <$> tok + _ -> tok + +inlineLanguageCommands :: PandocMonad m + => LP m Inlines -> M.Map Text (LP m Inlines) +inlineLanguageCommands tok = + M.fromList $ + ("foreignlanguage", foreignlanguage tok) : + (mk <$> M.toList polyglossiaLangToBCP47) + where + mk (polyglossia, bcp47Func) = + ("text" <> polyglossia, inlineLanguage tok bcp47Func) + +inlineLanguage :: PandocMonad m + => LP m Inlines -> (Text -> Lang) -> LP m Inlines +inlineLanguage tok bcp47Func = do + o <- option "" $ T.filter (\c -> c /= '[' && c /= ']') + <$> rawopt + let lang = renderLang $ bcp47Func o + extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok + +setDefaultLanguage :: PandocMonad m => LP m Blocks +setDefaultLanguage = do + o <- option "" $ T.filter (\c -> c /= '[' && c /= ']') + <$> rawopt + polylang <- untokenize <$> braced + case M.lookup polylang polyglossiaLangToBCP47 of + Nothing -> return mempty -- TODO mzero? warning? + Just langFunc -> do + let l = langFunc o + setTranslations l + updateState $ setMeta "lang" $ str (renderLang l) + return mempty polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang) polyglossiaLangToBCP47 = M.fromList [ ("arabic", \o -> case T.filter (/=' ') o of - "locale=algeria" -> Lang "ar" "" "DZ" [] - "locale=mashriq" -> Lang "ar" "" "SY" [] - "locale=libya" -> Lang "ar" "" "LY" [] - "locale=morocco" -> Lang "ar" "" "MA" [] - "locale=mauritania" -> Lang "ar" "" "MR" [] - "locale=tunisia" -> Lang "ar" "" "TN" [] - _ -> Lang "ar" "" "" []) + "locale=algeria" -> Lang "ar" Nothing (Just "DZ") [] [] [] + "locale=mashriq" -> Lang "ar" Nothing (Just "SY") [] [] [] + "locale=libya" -> Lang "ar" Nothing (Just "LY") [] [] [] + "locale=morocco" -> Lang "ar" Nothing (Just "MA") [] [] [] + "locale=mauritania" -> Lang "ar" Nothing (Just "MR") [] [] [] + "locale=tunisia" -> Lang "ar" Nothing (Just "TN") [] [] [] + _ -> Lang "ar" Nothing (Just "") [] [] []) , ("german", \o -> case T.filter (/=' ') o of - "spelling=old" -> Lang "de" "" "DE" ["1901"] + "spelling=old" -> Lang "de" Nothing (Just "DE") ["1901"] [] [] "variant=austrian,spelling=old" - -> Lang "de" "" "AT" ["1901"] - "variant=austrian" -> Lang "de" "" "AT" [] + -> Lang "de" Nothing (Just "AT") ["1901"] [] [] + "variant=austrian" -> Lang "de" Nothing (Just "AT") [] [] [] "variant=swiss,spelling=old" - -> Lang "de" "" "CH" ["1901"] - "variant=swiss" -> Lang "de" "" "CH" [] - _ -> Lang "de" "" "" []) - , ("lsorbian", \_ -> Lang "dsb" "" "" []) + -> Lang "de" Nothing (Just "CH") ["1901"] [] [] + "variant=swiss" -> Lang "de" Nothing (Just "CH") [] [] [] + _ -> Lang "de" Nothing Nothing [] [] []) + , ("lsorbian", \_ -> Lang "dsb" Nothing Nothing [] [] []) , ("greek", \o -> case T.filter (/=' ') o of - "variant=poly" -> Lang "el" "" "polyton" [] - "variant=ancient" -> Lang "grc" "" "" [] - _ -> Lang "el" "" "" []) + "variant=poly" -> Lang "el" Nothing (Just "polyton") [] [] [] + "variant=ancient" -> Lang "grc" Nothing Nothing [] [] [] + _ -> Lang "el" Nothing Nothing [] [] []) , ("english", \o -> case T.filter (/=' ') o of - "variant=australian" -> Lang "en" "" "AU" [] - "variant=canadian" -> Lang "en" "" "CA" [] - "variant=british" -> Lang "en" "" "GB" [] - "variant=newzealand" -> Lang "en" "" "NZ" [] - "variant=american" -> Lang "en" "" "US" [] - _ -> Lang "en" "" "" []) - , ("usorbian", \_ -> Lang "hsb" "" "" []) + "variant=australian" -> Lang "en" Nothing (Just "AU") [] [] [] + "variant=canadian" -> Lang "en" Nothing (Just "CA") [] [] [] + "variant=british" -> Lang "en" Nothing (Just "GB") [] [] [] + "variant=newzealand" -> Lang "en" Nothing (Just "NZ") [] [] [] + "variant=american" -> Lang "en" Nothing (Just "US") [] [] [] + _ -> Lang "en" Nothing (Just "") [] [] []) + , ("usorbian", \_ -> Lang "hsb" Nothing Nothing [] [] []) , ("latin", \o -> case T.filter (/=' ') o of - "variant=classic" -> Lang "la" "" "" ["x-classic"] - _ -> Lang "la" "" "" []) - , ("slovenian", \_ -> Lang "sl" "" "" []) - , ("serbianc", \_ -> Lang "sr" "cyrl" "" []) - , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"]) - , ("afrikaans", \_ -> Lang "af" "" "" []) - , ("amharic", \_ -> Lang "am" "" "" []) - , ("assamese", \_ -> Lang "as" "" "" []) - , ("asturian", \_ -> Lang "ast" "" "" []) - , ("bulgarian", \_ -> Lang "bg" "" "" []) - , ("bengali", \_ -> Lang "bn" "" "" []) - , ("tibetan", \_ -> Lang "bo" "" "" []) - , ("breton", \_ -> Lang "br" "" "" []) - , ("catalan", \_ -> Lang "ca" "" "" []) - , ("welsh", \_ -> Lang "cy" "" "" []) - , ("czech", \_ -> Lang "cs" "" "" []) - , ("coptic", \_ -> Lang "cop" "" "" []) - , ("danish", \_ -> Lang "da" "" "" []) - , ("divehi", \_ -> Lang "dv" "" "" []) - , ("esperanto", \_ -> Lang "eo" "" "" []) - , ("spanish", \_ -> Lang "es" "" "" []) - , ("estonian", \_ -> Lang "et" "" "" []) - , ("basque", \_ -> Lang "eu" "" "" []) - , ("farsi", \_ -> Lang "fa" "" "" []) - , ("finnish", \_ -> Lang "fi" "" "" []) - , ("french", \_ -> Lang "fr" "" "" []) - , ("friulan", \_ -> Lang "fur" "" "" []) - , ("irish", \_ -> Lang "ga" "" "" []) - , ("scottish", \_ -> Lang "gd" "" "" []) - , ("ethiopic", \_ -> Lang "gez" "" "" []) - , ("galician", \_ -> Lang "gl" "" "" []) - , ("hebrew", \_ -> Lang "he" "" "" []) - , ("hindi", \_ -> Lang "hi" "" "" []) - , ("croatian", \_ -> Lang "hr" "" "" []) - , ("magyar", \_ -> Lang "hu" "" "" []) - , ("armenian", \_ -> Lang "hy" "" "" []) - , ("interlingua", \_ -> Lang "ia" "" "" []) - , ("indonesian", \_ -> Lang "id" "" "" []) - , ("icelandic", \_ -> Lang "is" "" "" []) - , ("italian", \_ -> Lang "it" "" "" []) - , ("japanese", \_ -> Lang "jp" "" "" []) - , ("khmer", \_ -> Lang "km" "" "" []) - , ("kurmanji", \_ -> Lang "kmr" "" "" []) - , ("kannada", \_ -> Lang "kn" "" "" []) - , ("korean", \_ -> Lang "ko" "" "" []) - , ("lao", \_ -> Lang "lo" "" "" []) - , ("lithuanian", \_ -> Lang "lt" "" "" []) - , ("latvian", \_ -> Lang "lv" "" "" []) - , ("malayalam", \_ -> Lang "ml" "" "" []) - , ("mongolian", \_ -> Lang "mn" "" "" []) - , ("marathi", \_ -> Lang "mr" "" "" []) - , ("dutch", \_ -> Lang "nl" "" "" []) - , ("nynorsk", \_ -> Lang "nn" "" "" []) - , ("norsk", \_ -> Lang "no" "" "" []) - , ("nko", \_ -> Lang "nqo" "" "" []) - , ("occitan", \_ -> Lang "oc" "" "" []) - , ("panjabi", \_ -> Lang "pa" "" "" []) - , ("polish", \_ -> Lang "pl" "" "" []) - , ("piedmontese", \_ -> Lang "pms" "" "" []) - , ("portuguese", \_ -> Lang "pt" "" "" []) - , ("romansh", \_ -> Lang "rm" "" "" []) - , ("romanian", \_ -> Lang "ro" "" "" []) - , ("russian", \_ -> Lang "ru" "" "" []) - , ("sanskrit", \_ -> Lang "sa" "" "" []) - , ("samin", \_ -> Lang "se" "" "" []) - , ("slovak", \_ -> Lang "sk" "" "" []) - , ("albanian", \_ -> Lang "sq" "" "" []) - , ("serbian", \_ -> Lang "sr" "" "" []) - , ("swedish", \_ -> Lang "sv" "" "" []) - , ("syriac", \_ -> Lang "syr" "" "" []) - , ("tamil", \_ -> Lang "ta" "" "" []) - , ("telugu", \_ -> Lang "te" "" "" []) - , ("thai", \_ -> Lang "th" "" "" []) - , ("turkmen", \_ -> Lang "tk" "" "" []) - , ("turkish", \_ -> Lang "tr" "" "" []) - , ("ukrainian", \_ -> Lang "uk" "" "" []) - , ("urdu", \_ -> Lang "ur" "" "" []) - , ("vietnamese", \_ -> Lang "vi" "" "" []) + "variant=classic" -> Lang "la" Nothing Nothing ["x-classic"] [] [] + _ -> Lang "la" Nothing Nothing [] [] []) + , ("slovenian", \_ -> Lang "sl" Nothing Nothing [] [] []) + , ("serbianc", \_ -> Lang "sr" (Just "Cyrl") Nothing [] [] []) + , ("pinyin", \_ -> Lang "zh" (Just "Latn") Nothing ["pinyin"] [] []) + , ("afrikaans", \_ -> simpleLang "af") + , ("amharic", \_ -> simpleLang "am") + , ("assamese", \_ -> simpleLang "as") + , ("asturian", \_ -> simpleLang "ast") + , ("bulgarian", \_ -> simpleLang "bg") + , ("bengali", \_ -> simpleLang "bn") + , ("tibetan", \_ -> simpleLang "bo") + , ("breton", \_ -> simpleLang "br") + , ("catalan", \_ -> simpleLang "ca") + , ("welsh", \_ -> simpleLang "cy") + , ("czech", \_ -> simpleLang "cs") + , ("coptic", \_ -> simpleLang "cop") + , ("danish", \_ -> simpleLang "da") + , ("divehi", \_ -> simpleLang "dv") + , ("esperanto", \_ -> simpleLang "eo") + , ("spanish", \_ -> simpleLang "es") + , ("estonian", \_ -> simpleLang "et") + , ("basque", \_ -> simpleLang "eu") + , ("farsi", \_ -> simpleLang "fa") + , ("finnish", \_ -> simpleLang "fi") + , ("french", \_ -> simpleLang "fr") + , ("friulan", \_ -> simpleLang "fur") + , ("irish", \_ -> simpleLang "ga") + , ("scottish", \_ -> simpleLang "gd") + , ("ethiopic", \_ -> simpleLang "gez") + , ("galician", \_ -> simpleLang "gl") + , ("hebrew", \_ -> simpleLang "he") + , ("hindi", \_ -> simpleLang "hi") + , ("croatian", \_ -> simpleLang "hr") + , ("magyar", \_ -> simpleLang "hu") + , ("armenian", \_ -> simpleLang "hy") + , ("interlingua", \_ -> simpleLang "ia") + , ("indonesian", \_ -> simpleLang "id") + , ("icelandic", \_ -> simpleLang "is") + , ("italian", \_ -> simpleLang "it") + , ("japanese", \_ -> simpleLang "jp") + , ("khmer", \_ -> simpleLang "km") + , ("kurmanji", \_ -> simpleLang "kmr") + , ("kannada", \_ -> simpleLang "kn") + , ("korean", \_ -> simpleLang "ko") + , ("lao", \_ -> simpleLang "lo") + , ("lithuanian", \_ -> simpleLang "lt") + , ("latvian", \_ -> simpleLang "lv") + , ("malayalam", \_ -> simpleLang "ml") + , ("mongolian", \_ -> simpleLang "mn") + , ("marathi", \_ -> simpleLang "mr") + , ("dutch", \_ -> simpleLang "nl") + , ("nynorsk", \_ -> simpleLang "nn") + , ("norsk", \_ -> simpleLang "no") + , ("nko", \_ -> simpleLang "nqo") + , ("occitan", \_ -> simpleLang "oc") + , ("panjabi", \_ -> simpleLang "pa") + , ("polish", \_ -> simpleLang "pl") + , ("piedmontese", \_ -> simpleLang "pms") + , ("portuguese", \_ -> simpleLang "pt") + , ("romansh", \_ -> simpleLang "rm") + , ("romanian", \_ -> simpleLang "ro") + , ("russian", \_ -> simpleLang "ru") + , ("sanskrit", \_ -> simpleLang "sa") + , ("samin", \_ -> simpleLang "se") + , ("slovak", \_ -> simpleLang "sk") + , ("albanian", \_ -> simpleLang "sq") + , ("serbian", \_ -> simpleLang "sr") + , ("swedish", \_ -> simpleLang "sv") + , ("syriac", \_ -> simpleLang "syr") + , ("tamil", \_ -> simpleLang "ta") + , ("telugu", \_ -> simpleLang "te") + , ("thai", \_ -> simpleLang "th") + , ("turkmen", \_ -> simpleLang "tk") + , ("turkish", \_ -> simpleLang "tr") + , ("ukrainian", \_ -> simpleLang "uk") + , ("urdu", \_ -> simpleLang "ur") + , ("vietnamese", \_ -> simpleLang "vi") ] +simpleLang :: Text -> Lang +simpleLang l = Lang l Nothing Nothing [] [] [] + babelLangToBCP47 :: T.Text -> Maybe Lang babelLangToBCP47 s = case s of - "austrian" -> Just $ Lang "de" "" "AT" ["1901"] - "naustrian" -> Just $ Lang "de" "" "AT" [] - "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"] - "nswissgerman" -> Just $ Lang "de" "" "CH" [] - "german" -> Just $ Lang "de" "" "DE" ["1901"] - "ngerman" -> Just $ Lang "de" "" "DE" [] - "lowersorbian" -> Just $ Lang "dsb" "" "" [] - "uppersorbian" -> Just $ Lang "hsb" "" "" [] - "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"] - "slovene" -> Just $ Lang "sl" "" "" [] - "australian" -> Just $ Lang "en" "" "AU" [] - "canadian" -> Just $ Lang "en" "" "CA" [] - "british" -> Just $ Lang "en" "" "GB" [] - "newzealand" -> Just $ Lang "en" "" "NZ" [] - "american" -> Just $ Lang "en" "" "US" [] - "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"] + "austrian" -> Just $ Lang "de" Nothing (Just "AT") ["1901"] [] [] + "naustrian" -> Just $ Lang "de" Nothing (Just "AT") [] [] [] + "swissgerman" -> Just $ Lang "de" Nothing (Just "CH") ["1901"] [] [] + "nswissgerman" -> Just $ Lang "de" Nothing (Just "CH") [] [] [] + "german" -> Just $ Lang "de" Nothing (Just "DE") ["1901"] [] [] + "ngerman" -> Just $ Lang "de" Nothing (Just "DE") [] [] [] + "lowersorbian" -> Just $ Lang "dsb" Nothing Nothing [] [] [] + "uppersorbian" -> Just $ Lang "hsb" Nothing Nothing [] [] [] + "polutonikogreek" -> Just $ Lang "el" Nothing Nothing ["polyton"] [] [] + "slovene" -> Just $ simpleLang "sl" + "australian" -> Just $ Lang "en" Nothing (Just "AU") [] [] [] + "canadian" -> Just $ Lang "en" Nothing (Just "CA") [] [] [] + "british" -> Just $ Lang "en" Nothing (Just "GB") [] [] [] + "newzealand" -> Just $ Lang "en" Nothing (Just "NZ") [] [] [] + "american" -> Just $ Lang "en" Nothing (Just "US") [] [] [] + "classiclatin" -> Just $ Lang "la" Nothing Nothing ["x-classic"] [] [] _ -> ($ "") <$> M.lookup s polyglossiaLangToBCP47 diff --git a/src/Text/Pandoc/Readers/LaTeX/Macro.hs b/src/Text/Pandoc/Readers/LaTeX/Macro.hs new file mode 100644 index 000000000..5495a8e74 --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Macro.hs @@ -0,0 +1,184 @@ +{-# LANGUAGE OverloadedStrings #-} +module Text.Pandoc.Readers.LaTeX.Macro + ( macroDef + ) +where +import Text.Pandoc.Extensions (Extension(..)) +import Text.Pandoc.Logging (LogMessage(MacroAlreadyDefined)) +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Readers.LaTeX.Types +import Text.Pandoc.Class +import Text.Pandoc.Shared (safeRead) +import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline, + optional, space, spaces, withRaw, (<|>)) +import Control.Applicative ((<|>), optional) +import qualified Data.Map as M +import Data.Text (Text) +import qualified Data.Text as T + +macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a +macroDef constructor = do + (_, s) <- withRaw (commandDef <|> environmentDef) + (constructor (untokenize s) <$ + guardDisabled Ext_latex_macros) + <|> return mempty + where commandDef = do + nameMacroPairs <- newcommand <|> letmacro <|> defmacro <|> newif + guardDisabled Ext_latex_macros <|> + mapM_ (\(name, macro') -> + updateState (\s -> s{ sMacros = M.insert name macro' + (sMacros s) })) nameMacroPairs + environmentDef = do + mbenv <- newenvironment + case mbenv of + Nothing -> return () + Just (name, macro1, macro2) -> + guardDisabled Ext_latex_macros <|> + do updateState $ \s -> s{ sMacros = + M.insert name macro1 (sMacros s) } + updateState $ \s -> s{ sMacros = + M.insert ("end" <> name) macro2 (sMacros s) } + -- @\newenvironment{envname}[n-args][default]{begin}{end}@ + -- is equivalent to + -- @\newcommand{\envname}[n-args][default]{begin}@ + -- @\newcommand{\endenvname}@ + +letmacro :: PandocMonad m => LP m [(Text, Macro)] +letmacro = do + controlSeq "let" + (name, contents) <- withVerbatimMode $ do + Tok _ (CtrlSeq name) _ <- anyControlSeq + optional $ symbol '=' + spaces + -- we first parse in verbatim mode, and then expand macros, + -- because we don't want \let\foo\bar to turn into + -- \let\foo hello if we have previously \def\bar{hello} + contents <- bracedOrToken + return (name, contents) + contents' <- doMacros' 0 contents + return [(name, Macro ExpandWhenDefined [] Nothing contents')] + +defmacro :: PandocMonad m => LP m [(Text, Macro)] +defmacro = do + -- we use withVerbatimMode, because macros are to be expanded + -- at point of use, not point of definition + controlSeq "def" + withVerbatimMode $ do + Tok _ (CtrlSeq name) _ <- anyControlSeq + argspecs <- many (argspecArg <|> argspecPattern) + contents <- bracedOrToken + return [(name, Macro ExpandWhenUsed argspecs Nothing contents)] + +-- \newif\iffoo' defines: +-- \iffoo to be \iffalse +-- \footrue to be a command that defines \iffoo to be \iftrue +-- \foofalse to be a command that defines \iffoo to be \iffalse +newif :: PandocMonad m => LP m [(Text, Macro)] +newif = do + controlSeq "newif" + withVerbatimMode $ do + Tok pos (CtrlSeq name) _ <- anyControlSeq + -- \def\iffoo\iffalse + -- \def\footrue{\def\iffoo\iftrue} + -- \def\foofalse{\def\iffoo\iffalse} + let base = T.drop 2 name + return [ (name, Macro ExpandWhenUsed [] Nothing + [Tok pos (CtrlSeq "iffalse") "\\iffalse"]) + , (base <> "true", + Macro ExpandWhenUsed [] Nothing + [ Tok pos (CtrlSeq "def") "\\def" + , Tok pos (CtrlSeq name) ("\\" <> name) + , Tok pos (CtrlSeq "iftrue") "\\iftrue" + ]) + , (base <> "false", + Macro ExpandWhenUsed [] Nothing + [ Tok pos (CtrlSeq "def") "\\def" + , Tok pos (CtrlSeq name) ("\\" <> name) + , Tok pos (CtrlSeq "iffalse") "\\iffalse" + ]) + ] + +argspecArg :: PandocMonad m => LP m ArgSpec +argspecArg = do + Tok _ (Arg i) _ <- satisfyTok isArgTok + return $ ArgNum i + +argspecPattern :: PandocMonad m => LP m ArgSpec +argspecPattern = + Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) -> + (toktype' == Symbol || toktype' == Word) && + (txt /= "{" && txt /= "\\" && txt /= "}"))) + +newcommand :: PandocMonad m => LP m [(Text, Macro)] +newcommand = do + Tok pos (CtrlSeq mtype) _ <- controlSeq "newcommand" <|> + controlSeq "renewcommand" <|> + controlSeq "providecommand" <|> + controlSeq "DeclareMathOperator" <|> + controlSeq "DeclareRobustCommand" + withVerbatimMode $ do + Tok _ (CtrlSeq name) txt <- do + optional (symbol '*') + anyControlSeq <|> + (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}') + spaces + numargs <- option 0 $ try bracketedNum + let argspecs = map ArgNum [1..numargs] + spaces + optarg <- option Nothing $ Just <$> try bracketedToks + spaces + contents' <- bracedOrToken + let contents = + case mtype of + "DeclareMathOperator" -> + Tok pos (CtrlSeq "mathop") "\\mathop" + : Tok pos Symbol "{" + : Tok pos (CtrlSeq "mathrm") "\\mathrm" + : Tok pos Symbol "{" + : (contents' ++ + [ Tok pos Symbol "}", Tok pos Symbol "}" ]) + _ -> contents' + macros <- sMacros <$> getState + case M.lookup name macros of + Just macro + | mtype == "newcommand" -> do + report $ MacroAlreadyDefined txt pos + return [(name, macro)] + | mtype == "providecommand" -> return [(name, macro)] + _ -> return [(name, Macro ExpandWhenUsed argspecs optarg contents)] + +newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro)) +newenvironment = do + pos <- getPosition + Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|> + controlSeq "renewenvironment" <|> + controlSeq "provideenvironment" + withVerbatimMode $ do + optional $ symbol '*' + spaces + name <- untokenize <$> braced + spaces + numargs <- option 0 $ try bracketedNum + spaces + optarg <- option Nothing $ Just <$> try bracketedToks + let argspecs = map (\i -> ArgNum i) [1..numargs] + startcontents <- spaces >> bracedOrToken + endcontents <- spaces >> bracedOrToken + macros <- sMacros <$> getState + case M.lookup name macros of + Just _ + | mtype == "newenvironment" -> do + report $ MacroAlreadyDefined name pos + return Nothing + | mtype == "provideenvironment" -> + return Nothing + _ -> return $ Just (name, + Macro ExpandWhenUsed argspecs optarg startcontents, + Macro ExpandWhenUsed [] Nothing endcontents) + +bracketedNum :: PandocMonad m => LP m Int +bracketedNum = do + ds <- untokenize <$> bracketedToks + case safeRead ds of + Just i -> return i + _ -> return 0 diff --git a/src/Text/Pandoc/Readers/LaTeX/Math.hs b/src/Text/Pandoc/Readers/LaTeX/Math.hs new file mode 100644 index 000000000..5b49a0376 --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Math.hs @@ -0,0 +1,221 @@ +{-# LANGUAGE OverloadedStrings #-} +module Text.Pandoc.Readers.LaTeX.Math + ( dollarsMath + , inlineEnvironments + , inlineEnvironment + , mathInline + , mathDisplay + , theoremstyle + , theoremEnvironment + , newtheorem + , proof + ) +where +import Data.Maybe (fromMaybe) +import Text.Pandoc.Walk (walk) +import Text.Pandoc.Builder as B +import qualified Data.Sequence as Seq +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Readers.LaTeX.Types +import Text.Pandoc.Class +import Text.Pandoc.Shared (trimMath, stripTrailingNewlines) +import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline, + optional, space, spaces, withRaw, (<|>)) +import Control.Applicative ((<|>), optional) +import Control.Monad (guard, mzero) +import qualified Data.Map as M +import Data.Text (Text) + +dollarsMath :: PandocMonad m => LP m Inlines +dollarsMath = do + symbol '$' + display <- option False (True <$ symbol '$') + (do contents <- try $ untokenize <$> pDollarsMath 0 + if display + then mathDisplay contents <$ symbol '$' + else return $ mathInline contents) + <|> (guard display >> return (mathInline "")) + +-- Int is number of embedded groupings +pDollarsMath :: PandocMonad m => Int -> LP m [Tok] +pDollarsMath n = do + tk@(Tok _ toktype t) <- anyTok + case toktype of + Symbol | t == "$" + , n == 0 -> return [] + | t == "\\" -> do + tk' <- anyTok + (tk :) . (tk' :) <$> pDollarsMath n + | t == "{" -> (tk :) <$> pDollarsMath (n+1) + | t == "}" -> + if n > 0 + then (tk :) <$> pDollarsMath (n-1) + else mzero + _ -> (tk :) <$> pDollarsMath n + +mathDisplay :: Text -> Inlines +mathDisplay = displayMath . trimMath + +mathInline :: Text -> Inlines +mathInline = math . trimMath + +mathEnvWith :: PandocMonad m + => (Inlines -> a) -> Maybe Text -> Text -> LP m a +mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name + where inner x = case innerEnv of + Nothing -> x + Just y -> "\\begin{" <> y <> "}\n" <> x <> + "\\end{" <> y <> "}" + +mathEnv :: PandocMonad m => Text -> LP m Text +mathEnv name = do + skipopts + optional blankline + res <- manyTill anyTok (end_ name) + return $ stripTrailingNewlines $ untokenize res + +inlineEnvironment :: PandocMonad m => LP m Inlines +inlineEnvironment = try $ do + controlSeq "begin" + name <- untokenize <$> braced + M.findWithDefault mzero name inlineEnvironments + +inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines) +inlineEnvironments = M.fromList [ + ("displaymath", mathEnvWith id Nothing "displaymath") + , ("math", math <$> mathEnv "math") + , ("equation", mathEnvWith id Nothing "equation") + , ("equation*", mathEnvWith id Nothing "equation*") + , ("gather", mathEnvWith id (Just "gathered") "gather") + , ("gather*", mathEnvWith id (Just "gathered") "gather*") + , ("multline", mathEnvWith id (Just "gathered") "multline") + , ("multline*", mathEnvWith id (Just "gathered") "multline*") + , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray") + , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*") + , ("align", mathEnvWith id (Just "aligned") "align") + , ("align*", mathEnvWith id (Just "aligned") "align*") + , ("alignat", mathEnvWith id (Just "aligned") "alignat") + , ("alignat*", mathEnvWith id (Just "aligned") "alignat*") + , ("dmath", mathEnvWith id Nothing "dmath") + , ("dmath*", mathEnvWith id Nothing "dmath*") + , ("dgroup", mathEnvWith id (Just "aligned") "dgroup") + , ("dgroup*", mathEnvWith id (Just "aligned") "dgroup*") + , ("darray", mathEnvWith id (Just "aligned") "darray") + , ("darray*", mathEnvWith id (Just "aligned") "darray*") + ] + +theoremstyle :: PandocMonad m => LP m Blocks +theoremstyle = do + stylename <- untokenize <$> braced + let mbstyle = case stylename of + "plain" -> Just PlainStyle + "definition" -> Just DefinitionStyle + "remark" -> Just RemarkStyle + _ -> Nothing + case mbstyle of + Nothing -> return () + Just sty -> updateState $ \s -> s{ sLastTheoremStyle = sty } + return mempty + +newtheorem :: PandocMonad m => LP m Inlines -> LP m Blocks +newtheorem inline = do + number <- option True (False <$ symbol '*' <* sp) + name <- untokenize <$> braced + sp + series <- option Nothing $ Just . untokenize <$> bracketedToks + sp + showName <- tokWith inline + sp + syncTo <- option Nothing $ Just . untokenize <$> bracketedToks + sty <- sLastTheoremStyle <$> getState + let spec = TheoremSpec { theoremName = showName + , theoremStyle = sty + , theoremSeries = series + , theoremSyncTo = syncTo + , theoremNumber = number + , theoremLastNum = DottedNum [0] } + tmap <- sTheoremMap <$> getState + updateState $ \s -> s{ sTheoremMap = + M.insert name spec tmap } + return mempty + +theoremEnvironment :: PandocMonad m + => LP m Blocks -> LP m Inlines -> Text -> LP m Blocks +theoremEnvironment blocks opt name = do + tmap <- sTheoremMap <$> getState + case M.lookup name tmap of + Nothing -> mzero + Just tspec -> do + optTitle <- option mempty $ (\x -> space <> "(" <> x <> ")") <$> opt + mblabel <- option Nothing $ Just . untokenize <$> + try (spaces >> controlSeq "label" >> spaces >> braced) + bs <- env name blocks + number <- + if theoremNumber tspec + then do + let name' = fromMaybe name $ theoremSeries tspec + num <- getNextNumber + (maybe (DottedNum [0]) theoremLastNum . + M.lookup name' . sTheoremMap) + updateState $ \s -> + s{ sTheoremMap = + M.adjust + (\spec -> spec{ theoremLastNum = num }) + name' + (sTheoremMap s) + } + + case mblabel of + Just ident -> + updateState $ \s -> + s{ sLabels = M.insert ident + (B.toList $ + theoremName tspec <> "\160" <> + str (renderDottedNum num)) (sLabels s) } + Nothing -> return () + return $ space <> B.text (renderDottedNum num) + else return mempty + let titleEmph = case theoremStyle tspec of + PlainStyle -> B.strong + DefinitionStyle -> B.strong + RemarkStyle -> B.emph + let title = titleEmph (theoremName tspec <> number) + <> optTitle <> "." <> space + return $ divWith (fromMaybe "" mblabel, [name], []) $ addTitle title + $ case theoremStyle tspec of + PlainStyle -> walk italicize bs + _ -> bs + + + +proof :: PandocMonad m => LP m Blocks -> LP m Inlines -> LP m Blocks +proof blocks opt = do + title <- option (B.text "Proof") opt + bs <- env "proof" blocks + return $ + B.divWith ("", ["proof"], []) $ + addQed $ addTitle (B.emph (title <> ".")) bs + +addTitle :: Inlines -> Blocks -> Blocks +addTitle ils bs = + case B.toList bs of + (Para xs : rest) + -> B.fromList (Para (B.toList ils ++ (Space : xs)) : rest) + _ -> B.para ils <> bs + +addQed :: Blocks -> Blocks +addQed bs = + case Seq.viewr (B.unMany bs) of + s Seq.:> Para ils + -> B.Many (s Seq.|> Para (ils ++ B.toList qedSign)) + _ -> bs <> B.para qedSign + where + qedSign = B.str "\xa0\x25FB" + +italicize :: Block -> Block +italicize x@(Para [Image{}]) = x -- see #6925 +italicize (Para ils) = Para [Emph ils] +italicize (Plain ils) = Plain [Emph ils] +italicize x = x + + diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index 563d32883..9dac4d6ef 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -1,11 +1,12 @@ {-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Readers.LaTeX.Parsing - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -27,11 +28,15 @@ module Text.Pandoc.Readers.LaTeX.Parsing , rawLaTeXParser , applyMacros , tokenize + , tokenizeSources + , getInputTokens , untokenize , untoken , totoks , toksToString , satisfyTok + , parseFromToks + , disablingWithRaw , doMacros , doMacros' , setpos @@ -52,6 +57,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing , comment , anyTok , singleChar + , tokWith , specialChars , endline , blankline @@ -78,6 +84,11 @@ module Text.Pandoc.Readers.LaTeX.Parsing , rawopt , overlaySpecification , getNextNumber + , label + , setCaption + , resetCaption + , env + , addMeta ) where import Control.Applicative (many, (<|>)) @@ -87,13 +98,15 @@ import Control.Monad.Trans (lift) import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord) import Data.Default import Data.List (intercalate) +import qualified Data.IntMap as IntMap import qualified Data.Map as M import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad, report) -import Text.Pandoc.Error (PandocError (PandocMacroLoop)) +import Text.Pandoc.Error + (PandocError (PandocMacroLoop,PandocShouldNeverHappenError)) import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline, @@ -102,7 +115,6 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..), ArgSpec (..), Tok (..), TokType (..)) import Text.Pandoc.Shared import Text.Parsec.Pos --- import Debug.Trace newtype DottedNum = DottedNum [Int] deriving (Show, Eq) @@ -151,7 +163,9 @@ data LaTeXState = LaTeXState{ sOptions :: ReaderOptions , sLabels :: M.Map Text [Inline] , sHasChapters :: Bool , sToggles :: M.Map Text Bool - , sExpanded :: Bool + , sFileContents :: M.Map Text Text + , sEnableWithRaw :: Bool + , sRawTokens :: IntMap.IntMap [Tok] } deriving Show @@ -176,7 +190,9 @@ defaultLaTeXState = LaTeXState{ sOptions = def , sLabels = M.empty , sHasChapters = False , sToggles = M.empty - , sExpanded = False + , sFileContents = M.empty + , sEnableWithRaw = True + , sRawTokens = IntMap.empty } instance PandocMonad m => HasQuoteContext LaTeXState m where @@ -232,21 +248,25 @@ withVerbatimMode parser = do updateState $ \st -> st{ sVerbatimMode = False } return result -rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s) +rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s, Show a) => [Tok] -> Bool -> LP m a -> LP m a - -> ParserT Text s m (a, Text) + -> ParserT Sources s m (a, Text) rawLaTeXParser toks retokenize parser valParser = do pstate <- getState let lstate = def{ sOptions = extractReaderOptions pstate } let lstate' = lstate { sMacros = extractMacros pstate } + let setStartPos = case toks of + Tok pos _ _ : _ -> setPosition pos + _ -> return () + let preparser = setStartPos >> parser let rawparser = (,) <$> withRaw valParser <*> getState - res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks + res' <- lift $ runParserT (withRaw (preparser >> getPosition)) + lstate "chunk" toks case res' of Left _ -> mzero - Right toks' -> do + Right (endpos, toks') -> do res <- lift $ runParserT (do when retokenize $ do -- retokenize, applying macros - doMacros ts <- many (satisfyTok (const True)) setInput ts rawparser) @@ -255,7 +275,13 @@ rawLaTeXParser toks retokenize parser valParser = do Left _ -> mzero Right ((val, raw), st) -> do updateState (updateMacros (sMacros st <>)) - _ <- takeP (T.length (untokenize toks')) + let skipTilPos stopPos = do + anyChar + pos <- getPosition + if pos >= stopPos + then return () + else skipTilPos stopPos + skipTilPos endpos let result = untokenize raw -- ensure we end with space if input did, see #4442 let result' = @@ -268,7 +294,7 @@ rawLaTeXParser toks retokenize parser valParser = do return (val, result') applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => Text -> ParserT Text s m Text + => Text -> ParserT Sources s m Text applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|> do let retokenize = untokenize <$> many (satisfyTok (const True)) pstate <- getState @@ -279,6 +305,31 @@ applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|> Left e -> Prelude.fail (show e) Right s' -> return s' +{- +When tokenize or untokenize change, test with this +QuickCheck property: + +> tokUntokRoundtrip :: String -> Bool +> tokUntokRoundtrip s = +> let t = T.pack s in untokenize (tokenize "random" t) == t +-} + +tokenizeSources :: Sources -> [Tok] +tokenizeSources = concatMap tokenizeSource . unSources + where + tokenizeSource (pos, t) = totoks pos t + +-- Return tokens from input sources. Ensure that starting position is +-- correct. +getInputTokens :: PandocMonad m => ParserT Sources s m [Tok] +getInputTokens = do + pos <- getPosition + ss <- getInput + return $ + case ss of + Sources [] -> [] + Sources ((_,t):rest) -> tokenizeSources $ Sources ((pos,t):rest) + tokenize :: SourceName -> Text -> [Tok] tokenize sourcename = totoks (initialPos sourcename) @@ -402,41 +453,62 @@ untoken t = untokenAccum t mempty toksToString :: [Tok] -> String toksToString = T.unpack . untokenize +parseFromToks :: PandocMonad m => LP m a -> [Tok] -> LP m a +parseFromToks parser toks = do + oldInput <- getInput + setInput toks + oldpos <- getPosition + case toks of + Tok pos _ _ : _ -> setPosition pos + _ -> return () + result <- disablingWithRaw parser + setInput oldInput + setPosition oldpos + return result + +disablingWithRaw :: PandocMonad m => LP m a -> LP m a +disablingWithRaw parser = do + oldEnableWithRaw <- sEnableWithRaw <$> getState + updateState $ \st -> st{ sEnableWithRaw = False } + result <- parser + updateState $ \st -> st{ sEnableWithRaw = oldEnableWithRaw } + return result + satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok satisfyTok f = do doMacros -- apply macros on remaining input stream res <- tokenPrim (T.unpack . untoken) updatePos matcher - updateState $ \st -> st{ sExpanded = False } - return res + updateState $ \st -> + if sEnableWithRaw st + then st{ sRawTokens = IntMap.map (res:) $ sRawTokens st } + else st + return $! res where matcher t | f t = Just t | otherwise = Nothing updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos updatePos _spos _ (Tok pos _ _ : _) = pos - updatePos spos _ [] = incSourceColumn spos 1 + updatePos spos (Tok _ _ t) [] = incSourceColumn spos (T.length t) doMacros :: PandocMonad m => LP m () doMacros = do - expanded <- sExpanded <$> getState - verbatimMode <- sVerbatimMode <$> getState - unless (expanded || verbatimMode) $ do - getInput >>= doMacros' 1 >>= setInput - updateState $ \st -> st{ sExpanded = True } + st <- getState + unless (sVerbatimMode st) $ + getInput >>= doMacros' 1 >>= setInput doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok] doMacros' n inp = case inp of Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" : Tok _ Word name : Tok _ Symbol "}" : ts - -> handleMacros n spos name ts + -> handleMacros n spos name ts <|> return inp Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" : Tok _ Word name : Tok _ Symbol "}" : ts - -> handleMacros n spos ("end" <> name) ts + -> handleMacros n spos ("end" <> name) ts <|> return inp Tok _ (CtrlSeq "expandafter") _ : t : ts -> combineTok t <$> doMacros' n ts Tok spos (CtrlSeq name) _ : ts - -> handleMacros n spos name ts + -> handleMacros n spos name ts <|> return inp _ -> return inp - <|> return inp where combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts) @@ -482,7 +554,7 @@ doMacros' n inp = $ throwError $ PandocMacroLoop name macros <- sMacros <$> getState case M.lookup name macros of - Nothing -> mzero + Nothing -> trySpecialMacro name ts Just (Macro expansionPoint argspecs optarg newtoks) -> do let getargs' = do args <- @@ -510,6 +582,41 @@ doMacros' n inp = ExpandWhenUsed -> doMacros' (n' + 1) result ExpandWhenDefined -> return result +-- | Certain macros do low-level tex manipulations that can't +-- be represented in our Macro type, so we handle them here. +trySpecialMacro :: PandocMonad m => Text -> [Tok] -> LP m [Tok] +trySpecialMacro "xspace" ts = do + ts' <- doMacros' 1 ts + case ts' of + Tok pos Word t : _ + | startsWithAlphaNum t -> return $ Tok pos Spaces " " : ts' + _ -> return ts' +trySpecialMacro "iftrue" ts = handleIf True ts +trySpecialMacro "iffalse" ts = handleIf False ts +trySpecialMacro _ _ = mzero + +handleIf :: PandocMonad m => Bool -> [Tok] -> LP m [Tok] +handleIf b ts = do + res' <- lift $ runParserT (ifParser b) defaultLaTeXState "tokens" ts + case res' of + Left _ -> Prelude.fail "Could not parse conditional" + Right ts' -> return ts' + +ifParser :: PandocMonad m => Bool -> LP m [Tok] +ifParser b = do + ifToks <- many (notFollowedBy (controlSeq "else" <|> controlSeq "fi") + *> anyTok) + elseToks <- (controlSeq "else" >> manyTill anyTok (controlSeq "fi")) + <|> ([] <$ controlSeq "fi") + rest <- getInput + return $ (if b then ifToks else elseToks) ++ rest + +startsWithAlphaNum :: Text -> Bool +startsWithAlphaNum t = + case T.uncons t of + Just (c, _) | isAlphaNum c -> True + _ -> False + setpos :: SourcePos -> Tok -> Tok setpos spos (Tok _ tt txt) = Tok spos tt txt @@ -592,18 +699,22 @@ isCommentTok _ = False anyTok :: PandocMonad m => LP m Tok anyTok = satisfyTok (const True) +singleCharTok :: PandocMonad m => LP m Tok +singleCharTok = + satisfyTok $ \case + Tok _ Word t -> T.length t == 1 + Tok _ Symbol t -> not (T.any (`Set.member` specialChars) t) + _ -> False + singleChar :: PandocMonad m => LP m Tok -singleChar = try $ do - Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol]) - guard $ not $ toktype == Symbol && - T.any (`Set.member` specialChars) t - if T.length t > 1 - then do - let (t1, t2) = (T.take 1 t, T.drop 1 t) - inp <- getInput - setInput $ Tok (incSourceColumn pos 1) toktype t2 : inp - return $ Tok pos toktype t1 - else return $ Tok pos toktype t +singleChar = singleCharTok <|> singleCharFromWord + where + singleCharFromWord = do + Tok pos toktype t <- disablingWithRaw $ satisfyTok isWordTok + let (t1, t2) = (T.take 1 t, T.drop 1 t) + inp <- getInput + setInput $ Tok pos toktype t1 : Tok (incSourceColumn pos 1) toktype t2 : inp + anyTok specialChars :: Set.Set Char specialChars = Set.fromList "#$%&~_^\\{}" @@ -646,28 +757,25 @@ grouped parser = try $ do -- {{a,b}} should be parsed the same as {a,b} try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup) -braced' :: PandocMonad m => LP m Tok -> Int -> LP m [Tok] -braced' getTok n = - handleEgroup <|> handleBgroup <|> handleOther - where handleEgroup = do - t <- symbol '}' - if n == 1 - then return [] - else (t:) <$> braced' getTok (n - 1) - handleBgroup = do - t <- symbol '{' - (t:) <$> braced' getTok (n + 1) - handleOther = do - t <- getTok - (t:) <$> braced' getTok n +braced' :: PandocMonad m => LP m Tok -> LP m [Tok] +braced' getTok = symbol '{' *> go (1 :: Int) + where + go n = do + t <- getTok + case t of + Tok _ Symbol "}" + | n > 1 -> (t:) <$> go (n - 1) + | otherwise -> return [] + Tok _ Symbol "{" -> (t:) <$> go (n + 1) + _ -> (t:) <$> go n braced :: PandocMonad m => LP m [Tok] -braced = symbol '{' *> braced' anyTok 1 +braced = braced' anyTok -- URLs require special handling, because they can contain % -- characters. So we retonenize comments as we go... bracedUrl :: PandocMonad m => LP m [Tok] -bracedUrl = bgroup *> braced' (retokenizeComment >> anyTok) 1 +bracedUrl = braced' (retokenizeComment >> anyTok) -- For handling URLs, which allow literal % characters... retokenizeComment :: PandocMonad m => LP m () @@ -723,16 +831,29 @@ ignore raw = do withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok]) withRaw parser = do - inp <- getInput + rawTokensMap <- sRawTokens <$> getState + let key = case IntMap.lookupMax rawTokensMap of + Nothing -> 0 + Just (n,_) -> n + 1 + -- insert empty list at key + updateState $ \st -> st{ sRawTokens = + IntMap.insert key [] $ sRawTokens st } result <- parser - nxtpos <- option Nothing ((\(Tok pos' _ _) -> Just pos') <$> lookAhead anyTok) - let raw = takeWhile (\(Tok pos _ _) -> maybe True - (\p -> sourceName p /= sourceName pos || pos < p) nxtpos) inp + mbRevToks <- IntMap.lookup key . sRawTokens <$> getState + raw <- case mbRevToks of + Just revtoks -> do + updateState $ \st -> st{ sRawTokens = + IntMap.delete key $ sRawTokens st} + return $ reverse revtoks + Nothing -> + throwError $ PandocShouldNeverHappenError $ + "sRawTokens has nothing at key " <> T.pack (show key) return (result, raw) keyval :: PandocMonad m => LP m (Text, Text) keyval = try $ do - Tok _ Word key <- satisfyTok isWordTok + key <- untokenize <$> many1 (notFollowedBy (symbol '=') >> + (symbol '-' <|> symbol '_' <|> satisfyTok isWordTok)) sp val <- option mempty $ do symbol '=' @@ -792,7 +913,7 @@ getRawCommand name txt = do (_, rawargs) <- withRaw $ case name of "write" -> do - void $ satisfyTok isWordTok -- digits + void $ many $ satisfyTok isDigitTok -- digits void braced "titleformat" -> do void braced @@ -807,6 +928,10 @@ getRawCommand name txt = do void $ many braced return $ txt <> untokenize rawargs +isDigitTok :: Tok -> Bool +isDigitTok (Tok _ Word t) = T.all isDigit t +isDigitTok _ = False + skipopts :: PandocMonad m => LP m () skipopts = skipMany (void overlaySpecification <|> void rawopt) @@ -874,3 +999,35 @@ getNextNumber getCurrentNum = do Just n -> [n, 1] Nothing -> [1] +label :: PandocMonad m => LP m () +label = do + controlSeq "label" + t <- braced + updateState $ \st -> st{ sLastLabel = Just $ untokenize t } + +setCaption :: PandocMonad m => LP m Inlines -> LP m () +setCaption inline = try $ do + skipopts + ils <- tokWith inline + optional $ try $ spaces *> label + updateState $ \st -> st{ sCaption = Just ils } + +resetCaption :: PandocMonad m => LP m () +resetCaption = updateState $ \st -> st{ sCaption = Nothing + , sLastLabel = Nothing } + +env :: PandocMonad m => Text -> LP m a -> LP m a +env name p = p <* end_ name + +tokWith :: PandocMonad m => LP m Inlines -> LP m Inlines +tokWith inlineParser = try $ spaces >> + grouped inlineParser + <|> (lookAhead anyControlSeq >> inlineParser) + <|> singleChar' + where singleChar' = do + Tok _ _ t <- singleChar + return $ str t + +addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m () +addMeta field val = updateState $ \st -> + st{ sMeta = addMetaField field val $ sMeta st } diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs index db9c276e7..b8bf0ce7f 100644 --- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs +++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs @@ -1,12 +1,7 @@ +{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE OverloadedStrings #-} module Text.Pandoc.Readers.LaTeX.SIunitx - ( dosi - , doSI - , doSIrange - , doSInum - , doSInumlist - , doSIang - ) + ( siunitxCommands ) where import Text.Pandoc.Builder import Text.Pandoc.Readers.LaTeX.Parsing @@ -15,14 +10,32 @@ import Text.Pandoc.Class import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline, optional, space, spaces, withRaw, (<|>)) import Control.Applicative ((<|>)) +import Control.Monad (void) import qualified Data.Map as M import Data.Char (isDigit) import Data.Text (Text) import qualified Data.Text as T import Data.List (intersperse) +import qualified Data.Sequence as Seq +import Text.Pandoc.Walk (walk) + +siunitxCommands :: PandocMonad m + => LP m Inlines -> M.Map Text (LP m Inlines) +siunitxCommands tok = M.fromList + [ ("si", dosi tok) + , ("SI", doSI tok) + , ("SIrange", doSIrange True tok) + , ("numrange", doSIrange False tok) + , ("numlist", doSInumlist) + , ("SIlist", doSIlist tok) + , ("num", doSInum) + , ("ang", doSIang) + ] dosi :: PandocMonad m => LP m Inlines -> LP m Inlines -dosi tok = grouped (siUnit tok) <|> siUnit tok +dosi tok = do + options <- option [] keyvals + grouped (siUnit options tok) <|> siUnit options tok -- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €" doSI :: PandocMonad m => LP m Inlines -> LP m Inlines @@ -57,23 +70,50 @@ doSInumlist = do mconcat (intersperse (str "," <> space) (init xs)) <> text ", & " <> last xs +doSIlist :: PandocMonad m => LP m Inlines -> LP m Inlines +doSIlist tok = do + options <- option [] keyvals + nums <- map tonum . T.splitOn ";" . untokenize <$> braced + unit <- grouped (siUnit options tok) <|> siUnit options tok + let xs = map (<> (str "\xa0" <> unit)) nums + case xs of + [] -> return mempty + [x] -> return x + _ -> return $ + mconcat (intersperse (str "," <> space) (init xs)) <> + text ", & " <> last xs + parseNum :: Parser Text () Inlines parseNum = (mconcat <$> many parseNumPart) <* eof +minus :: Text +minus = "\x2212" + +hyphenToMinus :: Inline -> Inline +hyphenToMinus (Str t) = Str (T.replace "-" minus t) +hyphenToMinus x = x + parseNumPart :: Parser Text () Inlines parseNumPart = parseDecimalNum <|> parseComma <|> parsePlusMinus <|> + parsePM <|> parseI <|> parseExp <|> parseX <|> parseSpace where - parseDecimalNum = do - pref <- option mempty $ (mempty <$ char '+') <|> ("\x2212" <$ char '-') - basenum <- (pref <>) . T.pack - <$> many1 (satisfy (\c -> isDigit c || c == '.')) + parseDecimalNum, parsePlusMinus, parsePM, + parseComma, parseI, parseX, + parseExp, parseSpace :: Parser Text () Inlines + parseDecimalNum = try $ do + pref <- option mempty $ (mempty <$ char '+') <|> (minus <$ char '-') + basenum' <- many1 (satisfy (\c -> isDigit c || c == '.')) + let basenum = pref <> T.pack + (case basenum' of + '.':_ -> '0':basenum' + _ -> basenum') uncertainty <- option mempty $ T.pack <$> parseParens if T.null uncertainty then return $ str basenum @@ -91,6 +131,7 @@ parseNumPart = | otherwise -> "." <> t parseComma = str "." <$ char ',' parsePlusMinus = str "\xa0\xb1\xa0" <$ try (string "+-") + parsePM = str "\xa0\xb1\xa0" <$ try (string "\\pm") parseParens = char '(' *> many1 (satisfy (\c -> isDigit c || c == '.')) <* char ')' parseI = str "i" <$ char 'i' @@ -103,11 +144,14 @@ doSIang :: PandocMonad m => LP m Inlines doSIang = do skipopts ps <- T.splitOn ";" . untokenize <$> braced + let dropPlus t = case T.uncons t of + Just ('+',t') -> t' + _ -> t case ps ++ repeat "" of (d:m:s:_) -> return $ - (if T.null d then mempty else str d <> str "\xb0") <> - (if T.null m then mempty else str m <> str "\x2032") <> - (if T.null s then mempty else str s <> str "\x2033") + (if T.null d then mempty else str (dropPlus d) <> str "\xb0") <> + (if T.null m then mempty else str (dropPlus m) <> str "\x2032") <> + (if T.null s then mempty else str (dropPlus s) <> str "\x2033") _ -> return mempty -- converts e.g. \SIrange{100}{200}{\ms} to "100 ms--200 ms" @@ -136,40 +180,99 @@ doSIrange includeUnits tok = do emptyOr160 :: Inlines -> Inlines emptyOr160 x = if x == mempty then x else str "\160" -siUnit :: PandocMonad m => LP m Inlines -> LP m Inlines -siUnit tok = try (do - Tok _ (CtrlSeq name) _ <- anyControlSeq - case name of - "square" -> do - unit <- siUnit tok - return $ unit <> superscript "2" - "cubic" -> do - unit <- siUnit tok - return $ unit <> superscript "3" - "raisetothe" -> do - n <- tok - unit <- siUnit tok - return $ unit <> superscript n - _ -> - case M.lookup name siUnitMap of - Just il -> - option il $ - choice - [ (il <> superscript "2") <$ controlSeq "squared" - , (il <> superscript "3") <$ controlSeq "cubed" - , (\n -> il <> superscript n) <$> (controlSeq "tothe" *> tok) - ] - Nothing -> fail "not an siunit unit command") - <|> (lookAhead anyControlSeq >> tok) - <|> (do Tok _ Word t <- satisfyTok isWordTok - return $ str t) - <|> (symbol '^' *> (superscript <$> tok)) - <|> (symbol '_' *> (subscript <$> tok)) - <|> ("\xa0" <$ symbol '.') - <|> ("\xa0" <$ symbol '~') - <|> tok - <|> (do Tok _ _ t <- anyTok - return (str t)) +siUnit :: forall m. PandocMonad m => [(Text,Text)] -> LP m Inlines -> LP m Inlines +siUnit options tok = mconcat . intersperse (str "\xa0") <$> many1 siUnitPart + where + siUnitPart :: LP m Inlines + siUnitPart = try $ do + skipMany (void (symbol '.') <|> void (symbol '~') <|> spaces1) + x <- ((siPrefix <*> siBase) + <|> (do u <- siBase <|> tok + option u $ siSuffix <*> pure u)) + option x (siInfix x) + siInfix :: Inlines -> LP m Inlines + siInfix u1 = try $ + (do _ <- controlSeq "per" + u2 <- siUnitPart + let useSlash = lookup "per-mode" options == Just "symbol" + if useSlash + then return (u1 <> str "/" <> u2) + else return (u1 <> str "\xa0" <> negateExponent u2)) + <|> (do _ <- symbol '/' + u2 <- siUnitPart + return (u1 <> str "/" <> u2)) + siPrefix :: LP m (Inlines -> Inlines) + siPrefix = + (do _ <- controlSeq "square" + skipopts + return (<> superscript "2")) + <|> (do _ <- controlSeq "cubic" + skipopts + return (<> superscript "3")) + <|> (do _ <- controlSeq "raisetothe" + skipopts + n <- walk hyphenToMinus <$> tok + return (<> superscript n)) + siSuffix :: LP m (Inlines -> Inlines) + siSuffix = + (do _ <- controlSeq "squared" + skipopts + return (<> superscript "2")) + <|> (do _ <- controlSeq "cubed" + skipopts + return (<> superscript "3")) + <|> (do _ <- controlSeq "tothe" + skipopts + n <- walk hyphenToMinus <$> tok + return (<> superscript n)) + <|> (symbol '^' *> (do n <- walk hyphenToMinus <$> tok + return (<> superscript n))) + <|> (symbol '_' *> (do n <- walk hyphenToMinus <$> tok + return (<> subscript n))) + negateExponent :: Inlines -> Inlines + negateExponent ils = + case Seq.viewr (unMany ils) of + xs Seq.:> Superscript ss -> (Many xs) <> + superscript (str minus <> fromList ss) + _ -> ils <> superscript (str (minus <> "1")) + siBase :: LP m Inlines + siBase = + ((try + (do Tok _ (CtrlSeq name) _ <- anyControlSeq + case M.lookup name siUnitModifierMap of + Just il -> (il <>) <$> siBase + Nothing -> + case M.lookup name siUnitMap of + Just il -> pure il + Nothing -> fail "not a unit command")) + <|> (do Tok _ Word t <- satisfyTok isWordTok + return $ str t) + ) + +siUnitModifierMap :: M.Map Text Inlines +siUnitModifierMap = M.fromList + [ ("atto", str "a") + , ("centi", str "c") + , ("deca", str "d") + , ("deci", str "d") + , ("deka", str "d") + , ("exa", str "E") + , ("femto", str "f") + , ("giga", str "G") + , ("hecto", str "h") + , ("kilo", str "k") + , ("mega", str "M") + , ("micro", str "μ") + , ("milli", str "m") + , ("nano", str "n") + , ("peta", str "P") + , ("pico", str "p") + , ("tera", str "T") + , ("yocto", str "y") + , ("yotta", str "Y") + , ("zepto", str "z") + , ("zetta", str "Z") + ] siUnitMap :: M.Map Text Inlines siUnitMap = M.fromList @@ -269,7 +372,6 @@ siUnitMap = M.fromList , ("arcsecond", str "″") , ("astronomicalunit", str "ua") , ("atomicmassunit", str "u") - , ("atto", str "a") , ("bar", str "bar") , ("barn", str "b") , ("becquerel", str "Bq") @@ -277,51 +379,38 @@ siUnitMap = M.fromList , ("bohr", emph (str "a") <> subscript (str "0")) , ("candela", str "cd") , ("celsius", str "°C") - , ("centi", str "c") , ("clight", emph (str "c") <> subscript (str "0")) , ("coulomb", str "C") , ("dalton", str "Da") , ("day", str "d") - , ("deca", str "d") - , ("deci", str "d") , ("decibel", str "db") , ("degreeCelsius",str "°C") , ("degree", str "°") - , ("deka", str "d") , ("electronmass", emph (str "m") <> subscript (str "e")) , ("electronvolt", str "eV") , ("elementarycharge", emph (str "e")) - , ("exa", str "E") , ("farad", str "F") - , ("femto", str "f") - , ("giga", str "G") , ("gram", str "g") , ("gray", str "Gy") , ("hartree", emph (str "E") <> subscript (str "h")) , ("hectare", str "ha") - , ("hecto", str "h") , ("henry", str "H") , ("hertz", str "Hz") , ("hour", str "h") , ("joule", str "J") , ("katal", str "kat") , ("kelvin", str "K") - , ("kilo", str "k") , ("kilogram", str "kg") , ("knot", str "kn") , ("liter", str "L") , ("litre", str "l") , ("lumen", str "lm") , ("lux", str "lx") - , ("mega", str "M") , ("meter", str "m") , ("metre", str "m") - , ("micro", str "μ") - , ("milli", str "m") , ("minute", str "min") , ("mmHg", str "mmHg") , ("mole", str "mol") - , ("nano", str "n") , ("nauticalmile", str "M") , ("neper", str "Np") , ("newton", str "N") @@ -329,25 +418,17 @@ siUnitMap = M.fromList , ("Pa", str "Pa") , ("pascal", str "Pa") , ("percent", str "%") - , ("per", str "/") - , ("peta", str "P") - , ("pico", str "p") , ("planckbar", emph (str "\x210f")) , ("radian", str "rad") , ("second", str "s") , ("siemens", str "S") , ("sievert", str "Sv") , ("steradian", str "sr") - , ("tera", str "T") , ("tesla", str "T") , ("tonne", str "t") , ("volt", str "V") , ("watt", str "W") , ("weber", str "Wb") - , ("yocto", str "y") - , ("yotta", str "Y") - , ("zepto", str "z") - , ("zetta", str "Z") ] diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs new file mode 100644 index 000000000..f56728fe1 --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs @@ -0,0 +1,379 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE OverloadedStrings #-} +module Text.Pandoc.Readers.LaTeX.Table + ( tableEnvironments ) +where + +import Data.Functor (($>)) +import Text.Pandoc.Class +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Readers.LaTeX.Types +import Text.Pandoc.Builder as B +import qualified Data.Map as M +import Data.Text (Text) +import Data.Maybe (fromMaybe) +import qualified Data.Text as T +import Control.Applicative ((<|>), optional, many) +import Control.Monad (when, void) +import Text.Pandoc.Shared (safeRead, trim) +import Text.Pandoc.Logging (LogMessage(SkippedContent)) +import Text.Pandoc.Walk (walkM) +import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline, + optional, space, spaces, withRaw, (<|>)) + +tableEnvironments :: PandocMonad m + => LP m Blocks + -> LP m Inlines + -> M.Map Text (LP m Blocks) +tableEnvironments blocks inline = + M.fromList + [ ("longtable", env "longtable" $ + resetCaption *> + simpTable blocks inline "longtable" False >>= addTableCaption) + , ("table", env "table" $ + skipopts *> resetCaption *> blocks >>= addTableCaption) + , ("tabular*", env "tabular*" $ simpTable blocks inline "tabular*" True) + , ("tabularx", env "tabularx" $ simpTable blocks inline "tabularx" True) + , ("tabular", env "tabular" $ simpTable blocks inline "tabular" False) + ] + +hline :: PandocMonad m => LP m () +hline = try $ do + spaces + controlSeq "hline" <|> + (controlSeq "cline" <* braced) <|> + -- booktabs rules: + controlSeq "toprule" <|> + controlSeq "bottomrule" <|> + controlSeq "midrule" <|> + controlSeq "endhead" <|> + controlSeq "endfirsthead" + spaces + optional rawopt + return () + +lbreak :: PandocMonad m => LP m Tok +lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline") + <* skipopts <* spaces + +amp :: PandocMonad m => LP m Tok +amp = symbol '&' + +-- Split a Word into individual Symbols (for parseAligns) +splitWordTok :: PandocMonad m => LP m () +splitWordTok = do + inp <- getInput + case inp of + (Tok spos Word t : rest) -> + setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest + _ -> return () + +parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))] +parseAligns = try $ do + let maybeBar = skipMany + (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced))) + let cAlign = AlignCenter <$ symbol 'c' + let lAlign = AlignLeft <$ symbol 'l' + let rAlign = AlignRight <$ symbol 'r' + let parAlign = AlignLeft <$ symbol 'p' + -- aligns from tabularx + let xAlign = AlignLeft <$ symbol 'X' + let mAlign = AlignLeft <$ symbol 'm' + let bAlign = AlignLeft <$ symbol 'b' + let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign + <|> xAlign <|> mAlign <|> bAlign ) + let alignPrefix = symbol '>' >> braced + let alignSuffix = symbol '<' >> braced + let colWidth = try $ do + symbol '{' + ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth") + spaces + symbol '}' + return $ safeRead ds + let alignSpec = do + pref <- option [] alignPrefix + spaces + al <- alignChar + width <- colWidth <|> option Nothing (do s <- untokenize <$> braced + pos <- getPosition + report $ SkippedContent s pos + return Nothing) + spaces + suff <- option [] alignSuffix + return (al, width, (pref, suff)) + let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro + symbol '*' + spaces + ds <- trim . untokenize <$> braced + spaces + spec <- braced + case safeRead ds of + Just n -> + getInput >>= setInput . (mconcat (replicate n spec) ++) + Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number" + bgroup + spaces + maybeBar + aligns' <- many $ try $ spaces >> optional starAlign >> + (alignSpec <* maybeBar) + spaces + egroup + spaces + return $ map toSpec aligns' + where + toColWidth (Just w) | w > 0 = ColWidth w + toColWidth _ = ColWidthDefault + toSpec (x, y, z) = (x, toColWidth y, z) + +-- N.B. this parser returns a Row that may have erroneous empty cells +-- in it. See the note above fixTableHead for details. +parseTableRow :: PandocMonad m + => LP m Blocks -- ^ block parser + -> LP m Inlines -- ^ inline parser + -> Text -- ^ table environment name + -> [([Tok], [Tok])] -- ^ pref/suffixes + -> LP m Row +parseTableRow blocks inline envname prefsufs = do + notFollowedBy (spaces *> end_ envname) + -- contexts that can contain & that is not colsep: + let canContainAmp (Tok _ (CtrlSeq "begin") _) = True + canContainAmp (Tok _ (CtrlSeq "verb") _) = True + canContainAmp (Tok _ (CtrlSeq "Verb") _) = True + canContainAmp _ = False + -- add prefixes and suffixes in token stream: + let celltoks (pref, suff) = do + prefpos <- getPosition + contents <- mconcat <$> + many ( snd <$> withRaw + ((lookAhead (controlSeq "parbox") >> + void blocks) -- #5711 + <|> + (lookAhead (satisfyTok canContainAmp) >> void inline) + <|> + (lookAhead (symbol '$') >> void inline)) + <|> + (do notFollowedBy + (() <$ amp <|> () <$ lbreak <|> end_ envname) + count 1 anyTok) ) + + suffpos <- getPosition + option [] (count 1 amp) + return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff + rawcells <- mapM celltoks prefsufs + cells <- mapM (parseFromToks (parseTableCell blocks)) rawcells + spaces + return $ Row nullAttr cells + +parseTableCell :: PandocMonad m => LP m Blocks -> LP m Cell +parseTableCell blocks = do + spaces + updateState $ \st -> st{ sInTableCell = True } + cell' <- multicolumnCell blocks + <|> multirowCell blocks + <|> parseSimpleCell + <|> parseEmptyCell + updateState $ \st -> st{ sInTableCell = False } + spaces + return cell' + where + -- The parsing of empty cells is important in LaTeX, especially when dealing + -- with multirow/multicolumn. See #6603. + parseEmptyCell = spaces $> emptyCell + parseSimpleCell = simpleCell <$> (plainify <$> blocks) + + +cellAlignment :: PandocMonad m => LP m Alignment +cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|') + where + alignment = do + c <- untoken <$> singleChar + return $ case c of + "l" -> AlignLeft + "r" -> AlignRight + "c" -> AlignCenter + "*" -> AlignDefault + _ -> AlignDefault + +plainify :: Blocks -> Blocks +plainify bs = case toList bs of + [Para ils] -> plain (fromList ils) + _ -> bs + +multirowCell :: PandocMonad m => LP m Blocks -> LP m Cell +multirowCell blocks = controlSeq "multirow" >> do + -- Full prototype for \multirow macro is: + -- \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text} + -- However, everything except `nrows` and `text` make + -- sense in the context of the Pandoc AST + _ <- optional $ symbol '[' *> cellAlignment <* symbol ']' -- vertical position + nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced + _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- bigstrut-related + _ <- symbol '{' *> manyTill anyTok (symbol '}') -- Cell width + _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- Length used for fine-tuning + content <- symbol '{' *> (plainify <$> blocks) <* symbol '}' + return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content + +multicolumnCell :: PandocMonad m => LP m Blocks -> LP m Cell +multicolumnCell blocks = controlSeq "multicolumn" >> do + span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced + alignment <- symbol '{' *> cellAlignment <* symbol '}' + + let singleCell = do + content <- plainify <$> blocks + return $ cell alignment (RowSpan 1) (ColSpan span') content + + -- Two possible contents: either a \multirow cell, or content. + -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}} + -- Note that a \multirow cell can be nested in a \multicolumn, + -- but not the other way around. See #6603 + let nestedCell = do + (Cell _ _ (RowSpan rs) _ bs) <- multirowCell blocks + return $ cell + alignment + (RowSpan rs) + (ColSpan span') + (fromList bs) + + symbol '{' *> (nestedCell <|> singleCell) <* symbol '}' + +-- LaTeX tables are stored with empty cells underneath multirow cells +-- denoting the grid spaces taken up by them. More specifically, if a +-- cell spans m rows, then it will overwrite all the cells in the +-- columns it spans for (m-1) rows underneath it, requiring padding +-- cells in these places. These padding cells need to be removed for +-- proper table reading. See #6603. +-- +-- These fixTable functions do not otherwise fix up malformed +-- input tables: that is left to the table builder. +fixTableHead :: TableHead -> TableHead +fixTableHead (TableHead attr rows) = TableHead attr rows' + where + rows' = fixTableRows rows + +fixTableBody :: TableBody -> TableBody +fixTableBody (TableBody attr rhc th tb) + = TableBody attr rhc th' tb' + where + th' = fixTableRows th + tb' = fixTableRows tb + +fixTableRows :: [Row] -> [Row] +fixTableRows = fixTableRows' $ repeat Nothing + where + fixTableRows' oldHang (Row attr cells : rs) + = let (newHang, cells') = fixTableRow oldHang cells + rs' = fixTableRows' newHang rs + in Row attr cells' : rs' + fixTableRows' _ [] = [] + +-- The overhang is represented as Just (relative cell dimensions) or +-- Nothing for an empty grid space. +fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell]) +fixTableRow oldHang cells + -- If there's overhang, drop cells until their total width meets the + -- width of the occupied grid spaces (or we run out) + | (n, prefHang, restHang) <- splitHang oldHang + , n > 0 + = let cells' = dropToWidth getCellW n cells + (restHang', cells'') = fixTableRow restHang cells' + in (prefHang restHang', cells'') + -- Otherwise record the overhang of a pending cell and fix the rest + -- of the row + | c@(Cell _ _ h w _):cells' <- cells + = let h' = max 1 h + w' = max 1 w + oldHang' = dropToWidth getHangW w' oldHang + (newHang, cells'') = fixTableRow oldHang' cells' + in (toHang w' h' <> newHang, c : cells'') + | otherwise + = (oldHang, []) + where + getCellW (Cell _ _ _ w _) = w + getHangW = maybe 1 fst + getCS (ColSpan n) = n + + toHang c r + | r > 1 = [Just (c, r)] + | otherwise = replicate (getCS c) Nothing + + -- Take the prefix of the overhang list representing filled grid + -- spaces. Also return the remainder and the length of this prefix. + splitHang = splitHang' 0 id + + splitHang' !n l (Just (c, r):xs) + = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs + splitHang' n l xs = (n, l, xs) + + -- Drop list items until the total width of the dropped items + -- exceeds the passed width. + dropToWidth _ n l | n < 1 = l + dropToWidth wproj n (c:cs) = dropToWidth wproj (n - wproj c) cs + dropToWidth _ _ [] = [] + +simpTable :: PandocMonad m + => LP m Blocks + -> LP m Inlines + -> Text + -> Bool + -> LP m Blocks +simpTable blocks inline envname hasWidthParameter = try $ do + when hasWidthParameter $ () <$ tokWith inline + skipopts + colspecs <- parseAligns + let (aligns, widths, prefsufs) = unzip3 colspecs + optional $ controlSeq "caption" *> setCaption inline + spaces + optional label + spaces + optional lbreak + spaces + skipMany hline + spaces + header' <- option [] . try . fmap (:[]) $ + parseTableRow blocks inline envname prefsufs <* + lbreak <* many1 hline + spaces + rows <- sepEndBy (parseTableRow blocks inline envname prefsufs) + (lbreak <* optional (skipMany hline)) + spaces + optional $ controlSeq "caption" *> setCaption inline + spaces + optional label + spaces + optional lbreak + spaces + lookAhead $ controlSeq "end" -- make sure we're at end + let th = fixTableHead $ TableHead nullAttr header' + let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows] + let tf = TableFoot nullAttr [] + return $ table emptyCaption (zip aligns widths) th tbs tf + +addTableCaption :: PandocMonad m => Blocks -> LP m Blocks +addTableCaption = walkM go + where go (Table attr c spec th tb tf) = do + st <- getState + let mblabel = sLastLabel st + capt <- case (sCaption st, mblabel) of + (Just ils, Nothing) -> return $ caption Nothing (plain ils) + (Just ils, Just lab) -> do + num <- getNextNumber sLastTableNum + setState + st{ sLastTableNum = num + , sLabels = M.insert lab + [Str (renderDottedNum num)] + (sLabels st) } + return $ caption Nothing (plain ils) -- add number?? + (Nothing, _) -> return c + let attr' = case (attr, mblabel) of + ((_,classes,kvs), Just ident) -> + (ident,classes,kvs) + _ -> attr + return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf + go x = return x + +-- TODO: For now we add a Div to contain table attributes, since +-- most writers don't do anything yet with attributes on Table. +-- This can be removed when that changes. +addAttrDiv :: Attr -> Block -> Block +addAttrDiv ("",[],[]) b = b +addAttrDiv attr b = Div attr [b] diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs index a017a2afb..c20b72bc5 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Types.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs @@ -1,6 +1,7 @@ +{-# LANGUAGE FlexibleInstances #-} {- | Module : Text.Pandoc.Readers.LaTeX.Types - Copyright : Copyright (C) 2017-2020 John MacFarlane + Copyright : Copyright (C) 2017-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -18,7 +19,9 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..) ) where import Data.Text (Text) -import Text.Parsec.Pos (SourcePos) +import Text.Parsec.Pos (SourcePos, sourceName) +import Text.Pandoc.Sources +import Data.List (groupBy) data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment | Esc1 | Esc2 | Arg Int @@ -27,6 +30,16 @@ data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment | data Tok = Tok SourcePos TokType Text deriving (Eq, Ord, Show) +instance ToSources [Tok] where + toSources = Sources + . map (\ts -> case ts of + Tok p _ _ : _ -> (p, mconcat $ map tokToText ts) + _ -> error "toSources [Tok] encountered empty group") + . groupBy (\(Tok p1 _ _) (Tok p2 _ _) -> sourceName p1 == sourceName p2) + +tokToText :: Tok -> Text +tokToText (Tok _ _ t) = t + data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed deriving (Eq, Ord, Show) |
