aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/LaTeX
diff options
context:
space:
mode:
authorIgor Pashev <pashev.igor@gmail.com>2021-07-17 18:10:34 +0200
committerIgor Pashev <pashev.igor@gmail.com>2021-07-17 18:46:16 +0200
commit48459559a13a20083fc9b31eb523b8ea2bf0a63f (patch)
tree1c04e75709457403110a6f8c5c90099f22369de3 /src/Text/Pandoc/Readers/LaTeX
parent0c39509d9b6a58958228cebf5d643598e5c98950 (diff)
parent46099e79defe662e541b12548200caf29063c1c6 (diff)
downloadpandoc-48459559a13a20083fc9b31eb523b8ea2bf0a63f.tar.gz
Merge branch 'master' of https://github.com/jgm/pandoc
Diffstat (limited to 'src/Text/Pandoc/Readers/LaTeX')
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Citation.hs210
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Inline.hs397
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Lang.hs321
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Macro.hs184
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Math.hs221
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Parsing.hs273
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/SIunitx.hs223
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Table.hs379
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Types.hs17
9 files changed, 1973 insertions, 252 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX/Citation.hs b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
new file mode 100644
index 000000000..af97125c6
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
@@ -0,0 +1,210 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Citation
+ ( citationCommands
+ , cites
+ )
+where
+
+import Text.Pandoc.Class
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Builder as B
+import qualified Data.Map as M
+import Data.Text (Text)
+import Control.Applicative ((<|>), optional, many)
+import Control.Monad (mzero)
+import Control.Monad.Trans (lift)
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Error (PandocError(PandocParsecError))
+import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
+ optional, space, spaces, withRaw, (<|>))
+
+citationCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines)
+citationCommands inline =
+ let citation = citationWith inline
+ tok = spaces *> grouped inline
+ in M.fromList
+ [ ("cite", citation "cite" NormalCitation False)
+ , ("Cite", citation "Cite" NormalCitation False)
+ , ("citep", citation "citep" NormalCitation False)
+ , ("citep*", citation "citep*" NormalCitation False)
+ , ("citeal", citation "citeal" NormalCitation False)
+ , ("citealp", citation "citealp" NormalCitation False)
+ , ("citealp*", citation "citealp*" NormalCitation False)
+ , ("autocite", citation "autocite" NormalCitation False)
+ , ("smartcite", citation "smartcite" NormalCitation False)
+ , ("footcite", inNote <$> citation "footcite" NormalCitation False)
+ , ("parencite", citation "parencite" NormalCitation False)
+ , ("supercite", citation "supercite" NormalCitation False)
+ , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
+ , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
+ , ("citeyear", citation "citeyear" SuppressAuthor False)
+ , ("autocite*", citation "autocite*" SuppressAuthor False)
+ , ("cite*", citation "cite*" SuppressAuthor False)
+ , ("parencite*", citation "parencite*" SuppressAuthor False)
+ , ("textcite", citation "textcite" AuthorInText False)
+ , ("citet", citation "citet" AuthorInText False)
+ , ("citet*", citation "citet*" AuthorInText False)
+ , ("citealt", citation "citealt" AuthorInText False)
+ , ("citealt*", citation "citealt*" AuthorInText False)
+ , ("textcites", citation "textcites" AuthorInText True)
+ , ("cites", citation "cites" NormalCitation True)
+ , ("autocites", citation "autocites" NormalCitation True)
+ , ("footcites", inNote <$> citation "footcites" NormalCitation True)
+ , ("parencites", citation "parencites" NormalCitation True)
+ , ("supercites", citation "supercites" NormalCitation True)
+ , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
+ , ("Autocite", citation "Autocite" NormalCitation False)
+ , ("Smartcite", citation "Smartcite" NormalCitation False)
+ , ("Footcite", inNote <$> citation "Footcite" NormalCitation False)
+ , ("Parencite", citation "Parencite" NormalCitation False)
+ , ("Supercite", citation "Supercite" NormalCitation False)
+ , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
+ , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
+ , ("Citeyear", citation "Citeyear" SuppressAuthor False)
+ , ("Autocite*", citation "Autocite*" SuppressAuthor False)
+ , ("Cite*", citation "Cite*" SuppressAuthor False)
+ , ("Parencite*", citation "Parencite*" SuppressAuthor False)
+ , ("Textcite", citation "Textcite" AuthorInText False)
+ , ("Textcites", citation "Textcites" AuthorInText True)
+ , ("Cites", citation "Cites" NormalCitation True)
+ , ("Autocites", citation "Autocites" NormalCitation True)
+ , ("Footcites", inNote <$> citation "Footcites" NormalCitation True)
+ , ("Parencites", citation "Parencites" NormalCitation True)
+ , ("Supercites", citation "Supercites" NormalCitation True)
+ , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
+ , ("citetext", complexNatbibCitation inline NormalCitation)
+ , ("citeauthor", (try (tok *> sp *> controlSeq "citetext") *>
+ complexNatbibCitation inline AuthorInText)
+ <|> citation "citeauthor" AuthorInText False)
+ , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
+ addMeta "nocite"))
+ ]
+
+-- citations
+
+addPrefix :: [Inline] -> [Citation] -> [Citation]
+addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
+addPrefix _ _ = []
+
+addSuffix :: [Inline] -> [Citation] -> [Citation]
+addSuffix s ks@(_:_) =
+ let k = last ks
+ in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
+addSuffix _ _ = []
+
+simpleCiteArgs :: forall m . PandocMonad m => LP m Inlines -> LP m [Citation]
+simpleCiteArgs inline = try $ do
+ first <- optionMaybe $ toList <$> opt
+ second <- optionMaybe $ toList <$> opt
+ keys <- try $ bgroup *> manyTill citationLabel egroup
+ let (pre, suf) = case (first , second ) of
+ (Just s , Nothing) -> (mempty, s )
+ (Just s , Just t ) -> (s , t )
+ _ -> (mempty, mempty)
+ conv k = Citation { citationId = k
+ , citationPrefix = []
+ , citationSuffix = []
+ , citationMode = NormalCitation
+ , citationHash = 0
+ , citationNoteNum = 0
+ }
+ return $ addPrefix pre $ addSuffix suf $ map conv keys
+ where
+ opt :: PandocMonad m => LP m Inlines
+ opt = do
+ toks <- try (sp *> bracketedToks <* sp)
+ -- now parse the toks as inlines
+ st <- getState
+ parsed <- lift $
+ runParserT (mconcat <$> many inline) st "bracketed option" toks
+ case parsed of
+ Right result -> return result
+ Left e -> throwError $ PandocParsecError (toSources toks) e
+
+
+
+citationLabel :: PandocMonad m => LP m Text
+citationLabel = do
+ sp
+ untokenize <$>
+ (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar)
+ <* sp
+ <* optional (symbol ',')
+ <* sp)
+ where bibtexKeyChar = ".:;?!`'()/*@_+=-&[]" :: [Char]
+
+cites :: PandocMonad m
+ => LP m Inlines -> CitationMode -> Bool -> LP m [Citation]
+cites inline mode multi = try $ do
+ let paropt = parenWrapped inline
+ cits <- if multi
+ then do
+ multiprenote <- optionMaybe $ toList <$> paropt
+ multipostnote <- optionMaybe $ toList <$> paropt
+ let (pre, suf) = case (multiprenote, multipostnote) of
+ (Just s , Nothing) -> (mempty, s)
+ (Nothing , Just t) -> (mempty, t)
+ (Just s , Just t ) -> (s, t)
+ _ -> (mempty, mempty)
+ tempCits <- many1 $ simpleCiteArgs inline
+ case tempCits of
+ (k:ks) -> case ks of
+ (_:_) -> return $ (addMprenote pre k : init ks) ++
+ [addMpostnote suf (last ks)]
+ _ -> return [addMprenote pre (addMpostnote suf k)]
+ _ -> return [[]]
+ else count 1 $ simpleCiteArgs inline
+ let cs = concat cits
+ return $ case mode of
+ AuthorInText -> case cs of
+ (c:rest) -> c {citationMode = mode} : rest
+ [] -> []
+ _ -> map (\a -> a {citationMode = mode}) cs
+ where mprenote (k:ks) = (k:ks) ++ [Space]
+ mprenote _ = mempty
+ mpostnote (k:ks) = [Str ",", Space] ++ (k:ks)
+ mpostnote _ = mempty
+ addMprenote mpn (k:ks) =
+ let mpnfinal = case citationPrefix k of
+ (_:_) -> mprenote mpn
+ _ -> mpn
+ in addPrefix mpnfinal (k:ks)
+ addMprenote _ _ = []
+ addMpostnote = addSuffix . mpostnote
+
+citationWith :: PandocMonad m
+ => LP m Inlines -> Text -> CitationMode -> Bool -> LP m Inlines
+citationWith inline name mode multi = do
+ (c,raw) <- withRaw $ cites inline mode multi
+ return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw)
+
+handleCitationPart :: Inlines -> [Citation]
+handleCitationPart ils =
+ let isCite Cite{} = True
+ isCite _ = False
+ (pref, rest) = break isCite (toList ils)
+ in case rest of
+ (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs
+ _ -> []
+
+complexNatbibCitation :: PandocMonad m
+ => LP m Inlines -> CitationMode -> LP m Inlines
+complexNatbibCitation inline mode = try $ do
+ (cs, raw) <-
+ withRaw $ concat <$> do
+ bgroup
+ items <- mconcat <$>
+ many1 (notFollowedBy (symbol ';') >> inline)
+ `sepBy1` symbol ';'
+ egroup
+ return $ map handleCitationPart items
+ case cs of
+ [] -> mzero
+ (c:cits) -> return $ cite (c{ citationMode = mode }:cits)
+ (rawInline "latex" $ "\\citetext" <> untokenize raw)
+
+inNote :: Inlines -> Inlines
+inNote ils =
+ note $ para $ ils <> str "."
+
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
new file mode 100644
index 000000000..7b8bca4af
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -0,0 +1,397 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ViewPatterns #-}
+{- |
+ Module : Text.Pandoc.Readers.LaTeX.Inline
+ Copyright : Copyright (C) 2006-2021 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+-}
+module Text.Pandoc.Readers.LaTeX.Inline
+ ( acronymCommands
+ , verbCommands
+ , charCommands
+ , accentCommands
+ , nameCommands
+ , biblatexInlineCommands
+ , refCommands
+ , rawInlineOr
+ , listingsLanguage
+ )
+where
+
+import qualified Data.Map as M
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Builder
+import Text.Pandoc.Shared (toRomanNumeral, safeRead)
+import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..))
+import Control.Applicative (optional, (<|>))
+import Control.Monad (guard, mzero, mplus, unless)
+import Text.Pandoc.Class.PandocMonad (PandocMonad (..), translateTerm)
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Extensions (extensionEnabled, Extension(..))
+import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy,
+ manyTill, getInput, setInput, incSourceColumn,
+ option, many1, try)
+import Data.Char (isDigit)
+import Text.Pandoc.Highlighting (fromListingsLanguage,)
+import Data.Maybe (maybeToList, fromMaybe)
+import Text.Pandoc.Options (ReaderOptions(..))
+import qualified Data.Text.Normalize as Normalize
+import qualified Text.Pandoc.Translations as Translations
+
+rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines
+rawInlineOr name' fallback = do
+ parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions
+ if parseRaw
+ then rawInline "latex" <$> getRawCommand name' ("\\" <> name')
+ else fallback
+
+dolabel :: PandocMonad m => LP m Inlines
+dolabel = do
+ v <- braced
+ let refstr = untokenize v
+ updateState $ \st ->
+ st{ sLastLabel = Just refstr }
+ return $ spanWith (refstr,[],[("label", refstr)])
+ $ inBrackets $ str $ untokenize v
+
+doref :: PandocMonad m => Text -> LP m Inlines
+doref cls = do
+ v <- braced
+ let refstr = untokenize v
+ return $ linkWith ("",[],[ ("reference-type", cls)
+ , ("reference", refstr)])
+ ("#" <> refstr)
+ ""
+ (inBrackets $ str refstr)
+
+inBrackets :: Inlines -> Inlines
+inBrackets x = str "[" <> x <> str "]"
+
+doTerm :: PandocMonad m => Translations.Term -> LP m Inlines
+doTerm term = str <$> translateTerm term
+
+lit :: Text -> LP m Inlines
+lit = pure . str
+
+doverb :: PandocMonad m => LP m Inlines
+doverb = do
+ Tok _ Symbol t <- anySymbol
+ marker <- case T.uncons t of
+ Just (c, ts) | T.null ts -> return c
+ _ -> mzero
+ withVerbatimMode $
+ code . untokenize <$>
+ manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker)
+
+verbTok :: PandocMonad m => Char -> LP m Tok
+verbTok stopchar = do
+ t@(Tok pos toktype txt) <- anyTok
+ case T.findIndex (== stopchar) txt of
+ Nothing -> return t
+ Just i -> do
+ let (t1, t2) = T.splitAt i txt
+ inp <- getInput
+ setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar)
+ : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp
+ return $ Tok pos toktype t1
+
+listingsLanguage :: [(Text, Text)] -> Maybe Text
+listingsLanguage opts =
+ case lookup "language" opts of
+ Nothing -> Nothing
+ Just l -> fromListingsLanguage l `mplus` Just l
+
+dolstinline :: PandocMonad m => LP m Inlines
+dolstinline = do
+ options <- option [] keyvals
+ let classes = maybeToList $ listingsLanguage options
+ doinlinecode classes
+
+domintinline :: PandocMonad m => LP m Inlines
+domintinline = do
+ skipopts
+ cls <- untokenize <$> braced
+ doinlinecode [cls]
+
+doinlinecode :: PandocMonad m => [Text] -> LP m Inlines
+doinlinecode classes = do
+ Tok _ Symbol t <- anySymbol
+ marker <- case T.uncons t of
+ Just (c, ts) | T.null ts -> return c
+ _ -> mzero
+ let stopchar = if marker == '{' then '}' else marker
+ withVerbatimMode $
+ codeWith ("",classes,[]) . T.map nlToSpace . untokenize <$>
+ manyTill (verbTok stopchar) (symbol stopchar)
+
+nlToSpace :: Char -> Char
+nlToSpace '\n' = ' '
+nlToSpace x = x
+
+romanNumeralUpper :: (PandocMonad m) => LP m Inlines
+romanNumeralUpper =
+ str . toRomanNumeral <$> romanNumeralArg
+
+romanNumeralLower :: (PandocMonad m) => LP m Inlines
+romanNumeralLower =
+ str . T.toLower . toRomanNumeral <$> romanNumeralArg
+
+romanNumeralArg :: (PandocMonad m) => LP m Int
+romanNumeralArg = spaces *> (parser <|> inBraces)
+ where
+ inBraces = do
+ symbol '{'
+ spaces
+ res <- parser
+ spaces
+ symbol '}'
+ return res
+ parser = do
+ s <- untokenize <$> many1 (satisfyTok isWordTok)
+ let (digits, rest) = T.span isDigit s
+ unless (T.null rest) $
+ Prelude.fail "Non-digits in argument to \\Rn or \\RN"
+ safeRead digits
+
+accentWith :: PandocMonad m
+ => LP m Inlines -> Char -> Maybe Char -> LP m Inlines
+accentWith tok combiningAccent fallBack = try $ do
+ ils <- tok
+ case toList ils of
+ (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
+ -- try to normalize to the combined character:
+ Str (Normalize.normalize Normalize.NFC
+ (T.pack [x, combiningAccent]) <> xs) : ys
+ [Space] -> return $ str $ T.singleton
+ $ fromMaybe combiningAccent fallBack
+ [] -> return $ str $ T.singleton
+ $ fromMaybe combiningAccent fallBack
+ _ -> return ils
+
+
+verbCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+verbCommands = M.fromList
+ [ ("verb", doverb)
+ , ("lstinline", dolstinline)
+ , ("mintinline", domintinline)
+ , ("Verb", doverb)
+ ]
+
+accentCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines)
+accentCommands tok =
+ let accent = accentWith tok
+ in M.fromList
+ [ ("aa", lit "å")
+ , ("AA", lit "Å")
+ , ("ss", lit "ß")
+ , ("o", lit "ø")
+ , ("O", lit "Ø")
+ , ("L", lit "Ł")
+ , ("l", lit "ł")
+ , ("ae", lit "æ")
+ , ("AE", lit "Æ")
+ , ("oe", lit "œ")
+ , ("OE", lit "Œ")
+ , ("pounds", lit "£")
+ , ("euro", lit "€")
+ , ("copyright", lit "©")
+ , ("textasciicircum", lit "^")
+ , ("textasciitilde", lit "~")
+ , ("H", accent '\779' Nothing) -- hungarumlaut
+ , ("`", accent '\768' (Just '`')) -- grave
+ , ("'", accent '\769' (Just '\'')) -- acute
+ , ("^", accent '\770' (Just '^')) -- circ
+ , ("~", accent '\771' (Just '~')) -- tilde
+ , ("\"", accent '\776' Nothing) -- umlaut
+ , (".", accent '\775' Nothing) -- dot
+ , ("=", accent '\772' Nothing) -- macron
+ , ("|", accent '\781' Nothing) -- vertical line above
+ , ("b", accent '\817' Nothing) -- macron below
+ , ("c", accent '\807' Nothing) -- cedilla
+ , ("G", accent '\783' Nothing) -- doublegrave
+ , ("h", accent '\777' Nothing) -- hookabove
+ , ("d", accent '\803' Nothing) -- dotbelow
+ , ("f", accent '\785' Nothing) -- inverted breve
+ , ("r", accent '\778' Nothing) -- ringabove
+ , ("t", accent '\865' Nothing) -- double inverted breve
+ , ("U", accent '\782' Nothing) -- double vertical line above
+ , ("v", accent '\780' Nothing) -- hacek
+ , ("u", accent '\774' Nothing) -- breve
+ , ("k", accent '\808' Nothing) -- ogonek
+ , ("textogonekcentered", accent '\808' Nothing) -- ogonek
+ , ("i", lit "ı") -- dotless i
+ , ("j", lit "ȷ") -- dotless j
+ , ("newtie", accent '\785' Nothing) -- inverted breve
+ , ("textcircled", accent '\8413' Nothing) -- combining circle
+ ]
+
+charCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+charCommands = M.fromList
+ [ ("ldots", lit "…")
+ , ("vdots", lit "\8942")
+ , ("dots", lit "…")
+ , ("mdots", lit "…")
+ , ("sim", lit "~")
+ , ("sep", lit ",")
+ , ("P", lit "¶")
+ , ("S", lit "§")
+ , ("$", lit "$")
+ , ("%", lit "%")
+ , ("&", lit "&")
+ , ("#", lit "#")
+ , ("_", lit "_")
+ , ("{", lit "{")
+ , ("}", lit "}")
+ , ("qed", lit "\a0\x25FB")
+ , ("lq", return (str "‘"))
+ , ("rq", return (str "’"))
+ , ("textquoteleft", return (str "‘"))
+ , ("textquoteright", return (str "’"))
+ , ("textquotedblleft", return (str "“"))
+ , ("textquotedblright", return (str "”"))
+ , ("/", pure mempty) -- italic correction
+ , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
+ guard $ not inTableCell
+ optional rawopt
+ spaces))
+ , (",", lit "\8198")
+ , ("@", pure mempty)
+ , (" ", lit "\160")
+ , ("ps", pure $ str "PS." <> space)
+ , ("TeX", lit "TeX")
+ , ("LaTeX", lit "LaTeX")
+ , ("bar", lit "|")
+ , ("textless", lit "<")
+ , ("textgreater", lit ">")
+ , ("textbackslash", lit "\\")
+ , ("backslash", lit "\\")
+ , ("slash", lit "/")
+ -- fontawesome
+ , ("faCheck", lit "\10003")
+ , ("faClose", lit "\10007")
+ -- hyphenat
+ , ("bshyp", lit "\\\173")
+ , ("fshyp", lit "/\173")
+ , ("dothyp", lit ".\173")
+ , ("colonhyp", lit ":\173")
+ , ("hyp", lit "-")
+ ]
+
+biblatexInlineCommands :: PandocMonad m
+ => LP m Inlines -> M.Map Text (LP m Inlines)
+biblatexInlineCommands tok = M.fromList
+ -- biblatex misc
+ [ ("RN", romanNumeralUpper)
+ , ("Rn", romanNumeralLower)
+ , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok)
+ , ("mkbibemph", spanWith nullAttr . emph <$> tok)
+ , ("mkbibitalic", spanWith nullAttr . emph <$> tok)
+ , ("mkbibbold", spanWith nullAttr . strong <$> tok)
+ , ("mkbibparens",
+ spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok)
+ , ("mkbibbrackets",
+ spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok)
+ , ("autocap", spanWith nullAttr <$> tok)
+ , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
+ , ("bibstring",
+ (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize
+ <$> braced)
+ , ("adddot", pure (str "."))
+ , ("adddotspace", pure (spanWith nullAttr (str "." <> space)))
+ , ("addabbrvspace", pure space)
+ , ("hyphen", pure (str "-"))
+ ]
+
+nameCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+nameCommands = M.fromList
+ [ ("figurename", doTerm Translations.Figure)
+ , ("prefacename", doTerm Translations.Preface)
+ , ("refname", doTerm Translations.References)
+ , ("bibname", doTerm Translations.Bibliography)
+ , ("chaptername", doTerm Translations.Chapter)
+ , ("partname", doTerm Translations.Part)
+ , ("contentsname", doTerm Translations.Contents)
+ , ("listfigurename", doTerm Translations.ListOfFigures)
+ , ("listtablename", doTerm Translations.ListOfTables)
+ , ("indexname", doTerm Translations.Index)
+ , ("abstractname", doTerm Translations.Abstract)
+ , ("tablename", doTerm Translations.Table)
+ , ("enclname", doTerm Translations.Encl)
+ , ("ccname", doTerm Translations.Cc)
+ , ("headtoname", doTerm Translations.To)
+ , ("pagename", doTerm Translations.Page)
+ , ("seename", doTerm Translations.See)
+ , ("seealsoname", doTerm Translations.SeeAlso)
+ , ("proofname", doTerm Translations.Proof)
+ , ("glossaryname", doTerm Translations.Glossary)
+ , ("lstlistingname", doTerm Translations.Listing)
+ ]
+
+refCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+refCommands = M.fromList
+ [ ("label", rawInlineOr "label" dolabel)
+ , ("ref", rawInlineOr "ref" $ doref "ref")
+ , ("cref", rawInlineOr "cref" $ doref "ref") -- from cleveref.sty
+ , ("vref", rawInlineOr "vref" $ doref "ref+page") -- from varioref.sty
+ , ("eqref", rawInlineOr "eqref" $ doref "eqref") -- from amsmath.sty
+ ]
+
+acronymCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+acronymCommands = M.fromList
+ -- glossaries package
+ [ ("gls", doAcronym "short")
+ , ("Gls", doAcronym "short")
+ , ("glsdesc", doAcronym "long")
+ , ("Glsdesc", doAcronym "long")
+ , ("GLSdesc", doAcronym "long")
+ , ("acrlong", doAcronym "long")
+ , ("Acrlong", doAcronym "long")
+ , ("acrfull", doAcronym "full")
+ , ("Acrfull", doAcronym "full")
+ , ("acrshort", doAcronym "abbrv")
+ , ("Acrshort", doAcronym "abbrv")
+ , ("glspl", doAcronymPlural "short")
+ , ("Glspl", doAcronymPlural "short")
+ , ("glsdescplural", doAcronymPlural "long")
+ , ("Glsdescplural", doAcronymPlural "long")
+ , ("GLSdescplural", doAcronymPlural "long")
+ -- acronyms package
+ , ("ac", doAcronym "short")
+ , ("acf", doAcronym "full")
+ , ("acs", doAcronym "abbrv")
+ , ("acl", doAcronym "long")
+ , ("acp", doAcronymPlural "short")
+ , ("acfp", doAcronymPlural "full")
+ , ("acsp", doAcronymPlural "abbrv")
+ , ("aclp", doAcronymPlural "long")
+ , ("Ac", doAcronym "short")
+ , ("Acf", doAcronym "full")
+ , ("Acs", doAcronym "abbrv")
+ , ("Acl", doAcronym "long")
+ , ("Acp", doAcronymPlural "short")
+ , ("Acfp", doAcronymPlural "full")
+ , ("Acsp", doAcronymPlural "abbrv")
+ , ("Aclp", doAcronymPlural "long")
+ ]
+
+doAcronym :: PandocMonad m => Text -> LP m Inlines
+doAcronym form = do
+ acro <- braced
+ return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
+ ("acronym-form", "singular+" <> form)])
+ $ str $ untokenize acro]
+
+doAcronymPlural :: PandocMonad m => Text -> LP m Inlines
+doAcronymPlural form = do
+ acro <- braced
+ let plural = str "s"
+ return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
+ ("acronym-form", "plural+" <> form)]) $
+ mconcat [str $ untokenize acro, plural]]
+
+
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index 814b2fe79..6a8327904 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -1,7 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.LaTeX.Lang
- Copyright : Copyright (C) 2018-2020 John MacFarlane
+ Copyright : Copyright (C) 2018-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -12,144 +12,223 @@ Functions for parsing polyglossia and babel language specifiers to
BCP47 'Lang'.
-}
module Text.Pandoc.Readers.LaTeX.Lang
- ( polyglossiaLangToBCP47
+ ( setDefaultLanguage
+ , polyglossiaLangToBCP47
, babelLangToBCP47
+ , enquoteCommands
+ , inlineLanguageCommands
)
where
import qualified Data.Map as M
+import Data.Text (Text)
import qualified Data.Text as T
-import Text.Pandoc.BCP47 (Lang(..))
+import Text.Pandoc.Shared (extractSpaces)
+import Text.Collate.Lang (Lang(..), renderLang)
+import Text.Pandoc.Class (PandocMonad(..), setTranslations)
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..),
+ withQuoteContext)
+import Text.Pandoc.Builder (Blocks, Inlines, setMeta, str, spanWith,
+ singleQuoted, doubleQuoted)
+
+enquote :: PandocMonad m
+ => LP m Inlines
+ -> Bool -> Maybe Text -> LP m Inlines
+enquote tok starred mblang = do
+ skipopts
+ let lang = mblang >>= babelLangToBCP47
+ let langspan = case lang of
+ Nothing -> id
+ Just l -> spanWith ("",[],[("lang", renderLang l)])
+ quoteContext <- sQuoteContext <$> getState
+ if starred || quoteContext == InDoubleQuote
+ then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok
+ else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok
+
+enquoteCommands :: PandocMonad m
+ => LP m Inlines -> M.Map Text (LP m Inlines)
+enquoteCommands tok = M.fromList
+ [ ("enquote*", enquote tok True Nothing)
+ , ("enquote", enquote tok False Nothing)
+ -- foreignquote is supposed to use native quote marks
+ , ("foreignquote*", braced >>= enquote tok True . Just . untokenize)
+ , ("foreignquote", braced >>= enquote tok False . Just . untokenize)
+ -- hypehnquote uses regular quotes
+ , ("hyphenquote*", braced >>= enquote tok True . Just . untokenize)
+ , ("hyphenquote", braced >>= enquote tok False . Just . untokenize)
+ ]
+
+foreignlanguage :: PandocMonad m => LP m Inlines -> LP m Inlines
+foreignlanguage tok = do
+ babelLang <- untokenize <$> braced
+ case babelLangToBCP47 babelLang of
+ Just lang -> spanWith ("", [], [("lang", renderLang lang)]) <$> tok
+ _ -> tok
+
+inlineLanguageCommands :: PandocMonad m
+ => LP m Inlines -> M.Map Text (LP m Inlines)
+inlineLanguageCommands tok =
+ M.fromList $
+ ("foreignlanguage", foreignlanguage tok) :
+ (mk <$> M.toList polyglossiaLangToBCP47)
+ where
+ mk (polyglossia, bcp47Func) =
+ ("text" <> polyglossia, inlineLanguage tok bcp47Func)
+
+inlineLanguage :: PandocMonad m
+ => LP m Inlines -> (Text -> Lang) -> LP m Inlines
+inlineLanguage tok bcp47Func = do
+ o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
+ <$> rawopt
+ let lang = renderLang $ bcp47Func o
+ extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok
+
+setDefaultLanguage :: PandocMonad m => LP m Blocks
+setDefaultLanguage = do
+ o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
+ <$> rawopt
+ polylang <- untokenize <$> braced
+ case M.lookup polylang polyglossiaLangToBCP47 of
+ Nothing -> return mempty -- TODO mzero? warning?
+ Just langFunc -> do
+ let l = langFunc o
+ setTranslations l
+ updateState $ setMeta "lang" $ str (renderLang l)
+ return mempty
polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang)
polyglossiaLangToBCP47 = M.fromList
[ ("arabic", \o -> case T.filter (/=' ') o of
- "locale=algeria" -> Lang "ar" "" "DZ" []
- "locale=mashriq" -> Lang "ar" "" "SY" []
- "locale=libya" -> Lang "ar" "" "LY" []
- "locale=morocco" -> Lang "ar" "" "MA" []
- "locale=mauritania" -> Lang "ar" "" "MR" []
- "locale=tunisia" -> Lang "ar" "" "TN" []
- _ -> Lang "ar" "" "" [])
+ "locale=algeria" -> Lang "ar" Nothing (Just "DZ") [] [] []
+ "locale=mashriq" -> Lang "ar" Nothing (Just "SY") [] [] []
+ "locale=libya" -> Lang "ar" Nothing (Just "LY") [] [] []
+ "locale=morocco" -> Lang "ar" Nothing (Just "MA") [] [] []
+ "locale=mauritania" -> Lang "ar" Nothing (Just "MR") [] [] []
+ "locale=tunisia" -> Lang "ar" Nothing (Just "TN") [] [] []
+ _ -> Lang "ar" Nothing (Just "") [] [] [])
, ("german", \o -> case T.filter (/=' ') o of
- "spelling=old" -> Lang "de" "" "DE" ["1901"]
+ "spelling=old" -> Lang "de" Nothing (Just "DE") ["1901"] [] []
"variant=austrian,spelling=old"
- -> Lang "de" "" "AT" ["1901"]
- "variant=austrian" -> Lang "de" "" "AT" []
+ -> Lang "de" Nothing (Just "AT") ["1901"] [] []
+ "variant=austrian" -> Lang "de" Nothing (Just "AT") [] [] []
"variant=swiss,spelling=old"
- -> Lang "de" "" "CH" ["1901"]
- "variant=swiss" -> Lang "de" "" "CH" []
- _ -> Lang "de" "" "" [])
- , ("lsorbian", \_ -> Lang "dsb" "" "" [])
+ -> Lang "de" Nothing (Just "CH") ["1901"] [] []
+ "variant=swiss" -> Lang "de" Nothing (Just "CH") [] [] []
+ _ -> Lang "de" Nothing Nothing [] [] [])
+ , ("lsorbian", \_ -> Lang "dsb" Nothing Nothing [] [] [])
, ("greek", \o -> case T.filter (/=' ') o of
- "variant=poly" -> Lang "el" "" "polyton" []
- "variant=ancient" -> Lang "grc" "" "" []
- _ -> Lang "el" "" "" [])
+ "variant=poly" -> Lang "el" Nothing (Just "polyton") [] [] []
+ "variant=ancient" -> Lang "grc" Nothing Nothing [] [] []
+ _ -> Lang "el" Nothing Nothing [] [] [])
, ("english", \o -> case T.filter (/=' ') o of
- "variant=australian" -> Lang "en" "" "AU" []
- "variant=canadian" -> Lang "en" "" "CA" []
- "variant=british" -> Lang "en" "" "GB" []
- "variant=newzealand" -> Lang "en" "" "NZ" []
- "variant=american" -> Lang "en" "" "US" []
- _ -> Lang "en" "" "" [])
- , ("usorbian", \_ -> Lang "hsb" "" "" [])
+ "variant=australian" -> Lang "en" Nothing (Just "AU") [] [] []
+ "variant=canadian" -> Lang "en" Nothing (Just "CA") [] [] []
+ "variant=british" -> Lang "en" Nothing (Just "GB") [] [] []
+ "variant=newzealand" -> Lang "en" Nothing (Just "NZ") [] [] []
+ "variant=american" -> Lang "en" Nothing (Just "US") [] [] []
+ _ -> Lang "en" Nothing (Just "") [] [] [])
+ , ("usorbian", \_ -> Lang "hsb" Nothing Nothing [] [] [])
, ("latin", \o -> case T.filter (/=' ') o of
- "variant=classic" -> Lang "la" "" "" ["x-classic"]
- _ -> Lang "la" "" "" [])
- , ("slovenian", \_ -> Lang "sl" "" "" [])
- , ("serbianc", \_ -> Lang "sr" "cyrl" "" [])
- , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"])
- , ("afrikaans", \_ -> Lang "af" "" "" [])
- , ("amharic", \_ -> Lang "am" "" "" [])
- , ("assamese", \_ -> Lang "as" "" "" [])
- , ("asturian", \_ -> Lang "ast" "" "" [])
- , ("bulgarian", \_ -> Lang "bg" "" "" [])
- , ("bengali", \_ -> Lang "bn" "" "" [])
- , ("tibetan", \_ -> Lang "bo" "" "" [])
- , ("breton", \_ -> Lang "br" "" "" [])
- , ("catalan", \_ -> Lang "ca" "" "" [])
- , ("welsh", \_ -> Lang "cy" "" "" [])
- , ("czech", \_ -> Lang "cs" "" "" [])
- , ("coptic", \_ -> Lang "cop" "" "" [])
- , ("danish", \_ -> Lang "da" "" "" [])
- , ("divehi", \_ -> Lang "dv" "" "" [])
- , ("esperanto", \_ -> Lang "eo" "" "" [])
- , ("spanish", \_ -> Lang "es" "" "" [])
- , ("estonian", \_ -> Lang "et" "" "" [])
- , ("basque", \_ -> Lang "eu" "" "" [])
- , ("farsi", \_ -> Lang "fa" "" "" [])
- , ("finnish", \_ -> Lang "fi" "" "" [])
- , ("french", \_ -> Lang "fr" "" "" [])
- , ("friulan", \_ -> Lang "fur" "" "" [])
- , ("irish", \_ -> Lang "ga" "" "" [])
- , ("scottish", \_ -> Lang "gd" "" "" [])
- , ("ethiopic", \_ -> Lang "gez" "" "" [])
- , ("galician", \_ -> Lang "gl" "" "" [])
- , ("hebrew", \_ -> Lang "he" "" "" [])
- , ("hindi", \_ -> Lang "hi" "" "" [])
- , ("croatian", \_ -> Lang "hr" "" "" [])
- , ("magyar", \_ -> Lang "hu" "" "" [])
- , ("armenian", \_ -> Lang "hy" "" "" [])
- , ("interlingua", \_ -> Lang "ia" "" "" [])
- , ("indonesian", \_ -> Lang "id" "" "" [])
- , ("icelandic", \_ -> Lang "is" "" "" [])
- , ("italian", \_ -> Lang "it" "" "" [])
- , ("japanese", \_ -> Lang "jp" "" "" [])
- , ("khmer", \_ -> Lang "km" "" "" [])
- , ("kurmanji", \_ -> Lang "kmr" "" "" [])
- , ("kannada", \_ -> Lang "kn" "" "" [])
- , ("korean", \_ -> Lang "ko" "" "" [])
- , ("lao", \_ -> Lang "lo" "" "" [])
- , ("lithuanian", \_ -> Lang "lt" "" "" [])
- , ("latvian", \_ -> Lang "lv" "" "" [])
- , ("malayalam", \_ -> Lang "ml" "" "" [])
- , ("mongolian", \_ -> Lang "mn" "" "" [])
- , ("marathi", \_ -> Lang "mr" "" "" [])
- , ("dutch", \_ -> Lang "nl" "" "" [])
- , ("nynorsk", \_ -> Lang "nn" "" "" [])
- , ("norsk", \_ -> Lang "no" "" "" [])
- , ("nko", \_ -> Lang "nqo" "" "" [])
- , ("occitan", \_ -> Lang "oc" "" "" [])
- , ("panjabi", \_ -> Lang "pa" "" "" [])
- , ("polish", \_ -> Lang "pl" "" "" [])
- , ("piedmontese", \_ -> Lang "pms" "" "" [])
- , ("portuguese", \_ -> Lang "pt" "" "" [])
- , ("romansh", \_ -> Lang "rm" "" "" [])
- , ("romanian", \_ -> Lang "ro" "" "" [])
- , ("russian", \_ -> Lang "ru" "" "" [])
- , ("sanskrit", \_ -> Lang "sa" "" "" [])
- , ("samin", \_ -> Lang "se" "" "" [])
- , ("slovak", \_ -> Lang "sk" "" "" [])
- , ("albanian", \_ -> Lang "sq" "" "" [])
- , ("serbian", \_ -> Lang "sr" "" "" [])
- , ("swedish", \_ -> Lang "sv" "" "" [])
- , ("syriac", \_ -> Lang "syr" "" "" [])
- , ("tamil", \_ -> Lang "ta" "" "" [])
- , ("telugu", \_ -> Lang "te" "" "" [])
- , ("thai", \_ -> Lang "th" "" "" [])
- , ("turkmen", \_ -> Lang "tk" "" "" [])
- , ("turkish", \_ -> Lang "tr" "" "" [])
- , ("ukrainian", \_ -> Lang "uk" "" "" [])
- , ("urdu", \_ -> Lang "ur" "" "" [])
- , ("vietnamese", \_ -> Lang "vi" "" "" [])
+ "variant=classic" -> Lang "la" Nothing Nothing ["x-classic"] [] []
+ _ -> Lang "la" Nothing Nothing [] [] [])
+ , ("slovenian", \_ -> Lang "sl" Nothing Nothing [] [] [])
+ , ("serbianc", \_ -> Lang "sr" (Just "Cyrl") Nothing [] [] [])
+ , ("pinyin", \_ -> Lang "zh" (Just "Latn") Nothing ["pinyin"] [] [])
+ , ("afrikaans", \_ -> simpleLang "af")
+ , ("amharic", \_ -> simpleLang "am")
+ , ("assamese", \_ -> simpleLang "as")
+ , ("asturian", \_ -> simpleLang "ast")
+ , ("bulgarian", \_ -> simpleLang "bg")
+ , ("bengali", \_ -> simpleLang "bn")
+ , ("tibetan", \_ -> simpleLang "bo")
+ , ("breton", \_ -> simpleLang "br")
+ , ("catalan", \_ -> simpleLang "ca")
+ , ("welsh", \_ -> simpleLang "cy")
+ , ("czech", \_ -> simpleLang "cs")
+ , ("coptic", \_ -> simpleLang "cop")
+ , ("danish", \_ -> simpleLang "da")
+ , ("divehi", \_ -> simpleLang "dv")
+ , ("esperanto", \_ -> simpleLang "eo")
+ , ("spanish", \_ -> simpleLang "es")
+ , ("estonian", \_ -> simpleLang "et")
+ , ("basque", \_ -> simpleLang "eu")
+ , ("farsi", \_ -> simpleLang "fa")
+ , ("finnish", \_ -> simpleLang "fi")
+ , ("french", \_ -> simpleLang "fr")
+ , ("friulan", \_ -> simpleLang "fur")
+ , ("irish", \_ -> simpleLang "ga")
+ , ("scottish", \_ -> simpleLang "gd")
+ , ("ethiopic", \_ -> simpleLang "gez")
+ , ("galician", \_ -> simpleLang "gl")
+ , ("hebrew", \_ -> simpleLang "he")
+ , ("hindi", \_ -> simpleLang "hi")
+ , ("croatian", \_ -> simpleLang "hr")
+ , ("magyar", \_ -> simpleLang "hu")
+ , ("armenian", \_ -> simpleLang "hy")
+ , ("interlingua", \_ -> simpleLang "ia")
+ , ("indonesian", \_ -> simpleLang "id")
+ , ("icelandic", \_ -> simpleLang "is")
+ , ("italian", \_ -> simpleLang "it")
+ , ("japanese", \_ -> simpleLang "jp")
+ , ("khmer", \_ -> simpleLang "km")
+ , ("kurmanji", \_ -> simpleLang "kmr")
+ , ("kannada", \_ -> simpleLang "kn")
+ , ("korean", \_ -> simpleLang "ko")
+ , ("lao", \_ -> simpleLang "lo")
+ , ("lithuanian", \_ -> simpleLang "lt")
+ , ("latvian", \_ -> simpleLang "lv")
+ , ("malayalam", \_ -> simpleLang "ml")
+ , ("mongolian", \_ -> simpleLang "mn")
+ , ("marathi", \_ -> simpleLang "mr")
+ , ("dutch", \_ -> simpleLang "nl")
+ , ("nynorsk", \_ -> simpleLang "nn")
+ , ("norsk", \_ -> simpleLang "no")
+ , ("nko", \_ -> simpleLang "nqo")
+ , ("occitan", \_ -> simpleLang "oc")
+ , ("panjabi", \_ -> simpleLang "pa")
+ , ("polish", \_ -> simpleLang "pl")
+ , ("piedmontese", \_ -> simpleLang "pms")
+ , ("portuguese", \_ -> simpleLang "pt")
+ , ("romansh", \_ -> simpleLang "rm")
+ , ("romanian", \_ -> simpleLang "ro")
+ , ("russian", \_ -> simpleLang "ru")
+ , ("sanskrit", \_ -> simpleLang "sa")
+ , ("samin", \_ -> simpleLang "se")
+ , ("slovak", \_ -> simpleLang "sk")
+ , ("albanian", \_ -> simpleLang "sq")
+ , ("serbian", \_ -> simpleLang "sr")
+ , ("swedish", \_ -> simpleLang "sv")
+ , ("syriac", \_ -> simpleLang "syr")
+ , ("tamil", \_ -> simpleLang "ta")
+ , ("telugu", \_ -> simpleLang "te")
+ , ("thai", \_ -> simpleLang "th")
+ , ("turkmen", \_ -> simpleLang "tk")
+ , ("turkish", \_ -> simpleLang "tr")
+ , ("ukrainian", \_ -> simpleLang "uk")
+ , ("urdu", \_ -> simpleLang "ur")
+ , ("vietnamese", \_ -> simpleLang "vi")
]
+simpleLang :: Text -> Lang
+simpleLang l = Lang l Nothing Nothing [] [] []
+
babelLangToBCP47 :: T.Text -> Maybe Lang
babelLangToBCP47 s =
case s of
- "austrian" -> Just $ Lang "de" "" "AT" ["1901"]
- "naustrian" -> Just $ Lang "de" "" "AT" []
- "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"]
- "nswissgerman" -> Just $ Lang "de" "" "CH" []
- "german" -> Just $ Lang "de" "" "DE" ["1901"]
- "ngerman" -> Just $ Lang "de" "" "DE" []
- "lowersorbian" -> Just $ Lang "dsb" "" "" []
- "uppersorbian" -> Just $ Lang "hsb" "" "" []
- "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"]
- "slovene" -> Just $ Lang "sl" "" "" []
- "australian" -> Just $ Lang "en" "" "AU" []
- "canadian" -> Just $ Lang "en" "" "CA" []
- "british" -> Just $ Lang "en" "" "GB" []
- "newzealand" -> Just $ Lang "en" "" "NZ" []
- "american" -> Just $ Lang "en" "" "US" []
- "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"]
+ "austrian" -> Just $ Lang "de" Nothing (Just "AT") ["1901"] [] []
+ "naustrian" -> Just $ Lang "de" Nothing (Just "AT") [] [] []
+ "swissgerman" -> Just $ Lang "de" Nothing (Just "CH") ["1901"] [] []
+ "nswissgerman" -> Just $ Lang "de" Nothing (Just "CH") [] [] []
+ "german" -> Just $ Lang "de" Nothing (Just "DE") ["1901"] [] []
+ "ngerman" -> Just $ Lang "de" Nothing (Just "DE") [] [] []
+ "lowersorbian" -> Just $ Lang "dsb" Nothing Nothing [] [] []
+ "uppersorbian" -> Just $ Lang "hsb" Nothing Nothing [] [] []
+ "polutonikogreek" -> Just $ Lang "el" Nothing Nothing ["polyton"] [] []
+ "slovene" -> Just $ simpleLang "sl"
+ "australian" -> Just $ Lang "en" Nothing (Just "AU") [] [] []
+ "canadian" -> Just $ Lang "en" Nothing (Just "CA") [] [] []
+ "british" -> Just $ Lang "en" Nothing (Just "GB") [] [] []
+ "newzealand" -> Just $ Lang "en" Nothing (Just "NZ") [] [] []
+ "american" -> Just $ Lang "en" Nothing (Just "US") [] [] []
+ "classiclatin" -> Just $ Lang "la" Nothing Nothing ["x-classic"] [] []
_ -> ($ "") <$> M.lookup s polyglossiaLangToBCP47
diff --git a/src/Text/Pandoc/Readers/LaTeX/Macro.hs b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
new file mode 100644
index 000000000..5495a8e74
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
@@ -0,0 +1,184 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Macro
+ ( macroDef
+ )
+where
+import Text.Pandoc.Extensions (Extension(..))
+import Text.Pandoc.Logging (LogMessage(MacroAlreadyDefined))
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Types
+import Text.Pandoc.Class
+import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
+ optional, space, spaces, withRaw, (<|>))
+import Control.Applicative ((<|>), optional)
+import qualified Data.Map as M
+import Data.Text (Text)
+import qualified Data.Text as T
+
+macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a
+macroDef constructor = do
+ (_, s) <- withRaw (commandDef <|> environmentDef)
+ (constructor (untokenize s) <$
+ guardDisabled Ext_latex_macros)
+ <|> return mempty
+ where commandDef = do
+ nameMacroPairs <- newcommand <|> letmacro <|> defmacro <|> newif
+ guardDisabled Ext_latex_macros <|>
+ mapM_ (\(name, macro') ->
+ updateState (\s -> s{ sMacros = M.insert name macro'
+ (sMacros s) })) nameMacroPairs
+ environmentDef = do
+ mbenv <- newenvironment
+ case mbenv of
+ Nothing -> return ()
+ Just (name, macro1, macro2) ->
+ guardDisabled Ext_latex_macros <|>
+ do updateState $ \s -> s{ sMacros =
+ M.insert name macro1 (sMacros s) }
+ updateState $ \s -> s{ sMacros =
+ M.insert ("end" <> name) macro2 (sMacros s) }
+ -- @\newenvironment{envname}[n-args][default]{begin}{end}@
+ -- is equivalent to
+ -- @\newcommand{\envname}[n-args][default]{begin}@
+ -- @\newcommand{\endenvname}@
+
+letmacro :: PandocMonad m => LP m [(Text, Macro)]
+letmacro = do
+ controlSeq "let"
+ (name, contents) <- withVerbatimMode $ do
+ Tok _ (CtrlSeq name) _ <- anyControlSeq
+ optional $ symbol '='
+ spaces
+ -- we first parse in verbatim mode, and then expand macros,
+ -- because we don't want \let\foo\bar to turn into
+ -- \let\foo hello if we have previously \def\bar{hello}
+ contents <- bracedOrToken
+ return (name, contents)
+ contents' <- doMacros' 0 contents
+ return [(name, Macro ExpandWhenDefined [] Nothing contents')]
+
+defmacro :: PandocMonad m => LP m [(Text, Macro)]
+defmacro = do
+ -- we use withVerbatimMode, because macros are to be expanded
+ -- at point of use, not point of definition
+ controlSeq "def"
+ withVerbatimMode $ do
+ Tok _ (CtrlSeq name) _ <- anyControlSeq
+ argspecs <- many (argspecArg <|> argspecPattern)
+ contents <- bracedOrToken
+ return [(name, Macro ExpandWhenUsed argspecs Nothing contents)]
+
+-- \newif\iffoo' defines:
+-- \iffoo to be \iffalse
+-- \footrue to be a command that defines \iffoo to be \iftrue
+-- \foofalse to be a command that defines \iffoo to be \iffalse
+newif :: PandocMonad m => LP m [(Text, Macro)]
+newif = do
+ controlSeq "newif"
+ withVerbatimMode $ do
+ Tok pos (CtrlSeq name) _ <- anyControlSeq
+ -- \def\iffoo\iffalse
+ -- \def\footrue{\def\iffoo\iftrue}
+ -- \def\foofalse{\def\iffoo\iffalse}
+ let base = T.drop 2 name
+ return [ (name, Macro ExpandWhenUsed [] Nothing
+ [Tok pos (CtrlSeq "iffalse") "\\iffalse"])
+ , (base <> "true",
+ Macro ExpandWhenUsed [] Nothing
+ [ Tok pos (CtrlSeq "def") "\\def"
+ , Tok pos (CtrlSeq name) ("\\" <> name)
+ , Tok pos (CtrlSeq "iftrue") "\\iftrue"
+ ])
+ , (base <> "false",
+ Macro ExpandWhenUsed [] Nothing
+ [ Tok pos (CtrlSeq "def") "\\def"
+ , Tok pos (CtrlSeq name) ("\\" <> name)
+ , Tok pos (CtrlSeq "iffalse") "\\iffalse"
+ ])
+ ]
+
+argspecArg :: PandocMonad m => LP m ArgSpec
+argspecArg = do
+ Tok _ (Arg i) _ <- satisfyTok isArgTok
+ return $ ArgNum i
+
+argspecPattern :: PandocMonad m => LP m ArgSpec
+argspecPattern =
+ Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) ->
+ (toktype' == Symbol || toktype' == Word) &&
+ (txt /= "{" && txt /= "\\" && txt /= "}")))
+
+newcommand :: PandocMonad m => LP m [(Text, Macro)]
+newcommand = do
+ Tok pos (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
+ controlSeq "renewcommand" <|>
+ controlSeq "providecommand" <|>
+ controlSeq "DeclareMathOperator" <|>
+ controlSeq "DeclareRobustCommand"
+ withVerbatimMode $ do
+ Tok _ (CtrlSeq name) txt <- do
+ optional (symbol '*')
+ anyControlSeq <|>
+ (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}')
+ spaces
+ numargs <- option 0 $ try bracketedNum
+ let argspecs = map ArgNum [1..numargs]
+ spaces
+ optarg <- option Nothing $ Just <$> try bracketedToks
+ spaces
+ contents' <- bracedOrToken
+ let contents =
+ case mtype of
+ "DeclareMathOperator" ->
+ Tok pos (CtrlSeq "mathop") "\\mathop"
+ : Tok pos Symbol "{"
+ : Tok pos (CtrlSeq "mathrm") "\\mathrm"
+ : Tok pos Symbol "{"
+ : (contents' ++
+ [ Tok pos Symbol "}", Tok pos Symbol "}" ])
+ _ -> contents'
+ macros <- sMacros <$> getState
+ case M.lookup name macros of
+ Just macro
+ | mtype == "newcommand" -> do
+ report $ MacroAlreadyDefined txt pos
+ return [(name, macro)]
+ | mtype == "providecommand" -> return [(name, macro)]
+ _ -> return [(name, Macro ExpandWhenUsed argspecs optarg contents)]
+
+newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro))
+newenvironment = do
+ pos <- getPosition
+ Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|>
+ controlSeq "renewenvironment" <|>
+ controlSeq "provideenvironment"
+ withVerbatimMode $ do
+ optional $ symbol '*'
+ spaces
+ name <- untokenize <$> braced
+ spaces
+ numargs <- option 0 $ try bracketedNum
+ spaces
+ optarg <- option Nothing $ Just <$> try bracketedToks
+ let argspecs = map (\i -> ArgNum i) [1..numargs]
+ startcontents <- spaces >> bracedOrToken
+ endcontents <- spaces >> bracedOrToken
+ macros <- sMacros <$> getState
+ case M.lookup name macros of
+ Just _
+ | mtype == "newenvironment" -> do
+ report $ MacroAlreadyDefined name pos
+ return Nothing
+ | mtype == "provideenvironment" ->
+ return Nothing
+ _ -> return $ Just (name,
+ Macro ExpandWhenUsed argspecs optarg startcontents,
+ Macro ExpandWhenUsed [] Nothing endcontents)
+
+bracketedNum :: PandocMonad m => LP m Int
+bracketedNum = do
+ ds <- untokenize <$> bracketedToks
+ case safeRead ds of
+ Just i -> return i
+ _ -> return 0
diff --git a/src/Text/Pandoc/Readers/LaTeX/Math.hs b/src/Text/Pandoc/Readers/LaTeX/Math.hs
new file mode 100644
index 000000000..5b49a0376
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Math.hs
@@ -0,0 +1,221 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Math
+ ( dollarsMath
+ , inlineEnvironments
+ , inlineEnvironment
+ , mathInline
+ , mathDisplay
+ , theoremstyle
+ , theoremEnvironment
+ , newtheorem
+ , proof
+ )
+where
+import Data.Maybe (fromMaybe)
+import Text.Pandoc.Walk (walk)
+import Text.Pandoc.Builder as B
+import qualified Data.Sequence as Seq
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Types
+import Text.Pandoc.Class
+import Text.Pandoc.Shared (trimMath, stripTrailingNewlines)
+import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
+ optional, space, spaces, withRaw, (<|>))
+import Control.Applicative ((<|>), optional)
+import Control.Monad (guard, mzero)
+import qualified Data.Map as M
+import Data.Text (Text)
+
+dollarsMath :: PandocMonad m => LP m Inlines
+dollarsMath = do
+ symbol '$'
+ display <- option False (True <$ symbol '$')
+ (do contents <- try $ untokenize <$> pDollarsMath 0
+ if display
+ then mathDisplay contents <$ symbol '$'
+ else return $ mathInline contents)
+ <|> (guard display >> return (mathInline ""))
+
+-- Int is number of embedded groupings
+pDollarsMath :: PandocMonad m => Int -> LP m [Tok]
+pDollarsMath n = do
+ tk@(Tok _ toktype t) <- anyTok
+ case toktype of
+ Symbol | t == "$"
+ , n == 0 -> return []
+ | t == "\\" -> do
+ tk' <- anyTok
+ (tk :) . (tk' :) <$> pDollarsMath n
+ | t == "{" -> (tk :) <$> pDollarsMath (n+1)
+ | t == "}" ->
+ if n > 0
+ then (tk :) <$> pDollarsMath (n-1)
+ else mzero
+ _ -> (tk :) <$> pDollarsMath n
+
+mathDisplay :: Text -> Inlines
+mathDisplay = displayMath . trimMath
+
+mathInline :: Text -> Inlines
+mathInline = math . trimMath
+
+mathEnvWith :: PandocMonad m
+ => (Inlines -> a) -> Maybe Text -> Text -> LP m a
+mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name
+ where inner x = case innerEnv of
+ Nothing -> x
+ Just y -> "\\begin{" <> y <> "}\n" <> x <>
+ "\\end{" <> y <> "}"
+
+mathEnv :: PandocMonad m => Text -> LP m Text
+mathEnv name = do
+ skipopts
+ optional blankline
+ res <- manyTill anyTok (end_ name)
+ return $ stripTrailingNewlines $ untokenize res
+
+inlineEnvironment :: PandocMonad m => LP m Inlines
+inlineEnvironment = try $ do
+ controlSeq "begin"
+ name <- untokenize <$> braced
+ M.findWithDefault mzero name inlineEnvironments
+
+inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines)
+inlineEnvironments = M.fromList [
+ ("displaymath", mathEnvWith id Nothing "displaymath")
+ , ("math", math <$> mathEnv "math")
+ , ("equation", mathEnvWith id Nothing "equation")
+ , ("equation*", mathEnvWith id Nothing "equation*")
+ , ("gather", mathEnvWith id (Just "gathered") "gather")
+ , ("gather*", mathEnvWith id (Just "gathered") "gather*")
+ , ("multline", mathEnvWith id (Just "gathered") "multline")
+ , ("multline*", mathEnvWith id (Just "gathered") "multline*")
+ , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray")
+ , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*")
+ , ("align", mathEnvWith id (Just "aligned") "align")
+ , ("align*", mathEnvWith id (Just "aligned") "align*")
+ , ("alignat", mathEnvWith id (Just "aligned") "alignat")
+ , ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
+ , ("dmath", mathEnvWith id Nothing "dmath")
+ , ("dmath*", mathEnvWith id Nothing "dmath*")
+ , ("dgroup", mathEnvWith id (Just "aligned") "dgroup")
+ , ("dgroup*", mathEnvWith id (Just "aligned") "dgroup*")
+ , ("darray", mathEnvWith id (Just "aligned") "darray")
+ , ("darray*", mathEnvWith id (Just "aligned") "darray*")
+ ]
+
+theoremstyle :: PandocMonad m => LP m Blocks
+theoremstyle = do
+ stylename <- untokenize <$> braced
+ let mbstyle = case stylename of
+ "plain" -> Just PlainStyle
+ "definition" -> Just DefinitionStyle
+ "remark" -> Just RemarkStyle
+ _ -> Nothing
+ case mbstyle of
+ Nothing -> return ()
+ Just sty -> updateState $ \s -> s{ sLastTheoremStyle = sty }
+ return mempty
+
+newtheorem :: PandocMonad m => LP m Inlines -> LP m Blocks
+newtheorem inline = do
+ number <- option True (False <$ symbol '*' <* sp)
+ name <- untokenize <$> braced
+ sp
+ series <- option Nothing $ Just . untokenize <$> bracketedToks
+ sp
+ showName <- tokWith inline
+ sp
+ syncTo <- option Nothing $ Just . untokenize <$> bracketedToks
+ sty <- sLastTheoremStyle <$> getState
+ let spec = TheoremSpec { theoremName = showName
+ , theoremStyle = sty
+ , theoremSeries = series
+ , theoremSyncTo = syncTo
+ , theoremNumber = number
+ , theoremLastNum = DottedNum [0] }
+ tmap <- sTheoremMap <$> getState
+ updateState $ \s -> s{ sTheoremMap =
+ M.insert name spec tmap }
+ return mempty
+
+theoremEnvironment :: PandocMonad m
+ => LP m Blocks -> LP m Inlines -> Text -> LP m Blocks
+theoremEnvironment blocks opt name = do
+ tmap <- sTheoremMap <$> getState
+ case M.lookup name tmap of
+ Nothing -> mzero
+ Just tspec -> do
+ optTitle <- option mempty $ (\x -> space <> "(" <> x <> ")") <$> opt
+ mblabel <- option Nothing $ Just . untokenize <$>
+ try (spaces >> controlSeq "label" >> spaces >> braced)
+ bs <- env name blocks
+ number <-
+ if theoremNumber tspec
+ then do
+ let name' = fromMaybe name $ theoremSeries tspec
+ num <- getNextNumber
+ (maybe (DottedNum [0]) theoremLastNum .
+ M.lookup name' . sTheoremMap)
+ updateState $ \s ->
+ s{ sTheoremMap =
+ M.adjust
+ (\spec -> spec{ theoremLastNum = num })
+ name'
+ (sTheoremMap s)
+ }
+
+ case mblabel of
+ Just ident ->
+ updateState $ \s ->
+ s{ sLabels = M.insert ident
+ (B.toList $
+ theoremName tspec <> "\160" <>
+ str (renderDottedNum num)) (sLabels s) }
+ Nothing -> return ()
+ return $ space <> B.text (renderDottedNum num)
+ else return mempty
+ let titleEmph = case theoremStyle tspec of
+ PlainStyle -> B.strong
+ DefinitionStyle -> B.strong
+ RemarkStyle -> B.emph
+ let title = titleEmph (theoremName tspec <> number)
+ <> optTitle <> "." <> space
+ return $ divWith (fromMaybe "" mblabel, [name], []) $ addTitle title
+ $ case theoremStyle tspec of
+ PlainStyle -> walk italicize bs
+ _ -> bs
+
+
+
+proof :: PandocMonad m => LP m Blocks -> LP m Inlines -> LP m Blocks
+proof blocks opt = do
+ title <- option (B.text "Proof") opt
+ bs <- env "proof" blocks
+ return $
+ B.divWith ("", ["proof"], []) $
+ addQed $ addTitle (B.emph (title <> ".")) bs
+
+addTitle :: Inlines -> Blocks -> Blocks
+addTitle ils bs =
+ case B.toList bs of
+ (Para xs : rest)
+ -> B.fromList (Para (B.toList ils ++ (Space : xs)) : rest)
+ _ -> B.para ils <> bs
+
+addQed :: Blocks -> Blocks
+addQed bs =
+ case Seq.viewr (B.unMany bs) of
+ s Seq.:> Para ils
+ -> B.Many (s Seq.|> Para (ils ++ B.toList qedSign))
+ _ -> bs <> B.para qedSign
+ where
+ qedSign = B.str "\xa0\x25FB"
+
+italicize :: Block -> Block
+italicize x@(Para [Image{}]) = x -- see #6925
+italicize (Para ils) = Para [Emph ils]
+italicize (Plain ils) = Plain [Emph ils]
+italicize x = x
+
+
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 563d32883..9dac4d6ef 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -1,11 +1,12 @@
{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{- |
Module : Text.Pandoc.Readers.LaTeX.Parsing
- Copyright : Copyright (C) 2006-2020 John MacFarlane
+ Copyright : Copyright (C) 2006-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -27,11 +28,15 @@ module Text.Pandoc.Readers.LaTeX.Parsing
, rawLaTeXParser
, applyMacros
, tokenize
+ , tokenizeSources
+ , getInputTokens
, untokenize
, untoken
, totoks
, toksToString
, satisfyTok
+ , parseFromToks
+ , disablingWithRaw
, doMacros
, doMacros'
, setpos
@@ -52,6 +57,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
, comment
, anyTok
, singleChar
+ , tokWith
, specialChars
, endline
, blankline
@@ -78,6 +84,11 @@ module Text.Pandoc.Readers.LaTeX.Parsing
, rawopt
, overlaySpecification
, getNextNumber
+ , label
+ , setCaption
+ , resetCaption
+ , env
+ , addMeta
) where
import Control.Applicative (many, (<|>))
@@ -87,13 +98,15 @@ import Control.Monad.Trans (lift)
import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord)
import Data.Default
import Data.List (intercalate)
+import qualified Data.IntMap as IntMap
import qualified Data.Map as M
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.Builder
import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
-import Text.Pandoc.Error (PandocError (PandocMacroLoop))
+import Text.Pandoc.Error
+ (PandocError (PandocMacroLoop,PandocShouldNeverHappenError))
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
@@ -102,7 +115,6 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
ArgSpec (..), Tok (..), TokType (..))
import Text.Pandoc.Shared
import Text.Parsec.Pos
--- import Debug.Trace
newtype DottedNum = DottedNum [Int]
deriving (Show, Eq)
@@ -151,7 +163,9 @@ data LaTeXState = LaTeXState{ sOptions :: ReaderOptions
, sLabels :: M.Map Text [Inline]
, sHasChapters :: Bool
, sToggles :: M.Map Text Bool
- , sExpanded :: Bool
+ , sFileContents :: M.Map Text Text
+ , sEnableWithRaw :: Bool
+ , sRawTokens :: IntMap.IntMap [Tok]
}
deriving Show
@@ -176,7 +190,9 @@ defaultLaTeXState = LaTeXState{ sOptions = def
, sLabels = M.empty
, sHasChapters = False
, sToggles = M.empty
- , sExpanded = False
+ , sFileContents = M.empty
+ , sEnableWithRaw = True
+ , sRawTokens = IntMap.empty
}
instance PandocMonad m => HasQuoteContext LaTeXState m where
@@ -232,21 +248,25 @@ withVerbatimMode parser = do
updateState $ \st -> st{ sVerbatimMode = False }
return result
-rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s, Show a)
=> [Tok] -> Bool -> LP m a -> LP m a
- -> ParserT Text s m (a, Text)
+ -> ParserT Sources s m (a, Text)
rawLaTeXParser toks retokenize parser valParser = do
pstate <- getState
let lstate = def{ sOptions = extractReaderOptions pstate }
let lstate' = lstate { sMacros = extractMacros pstate }
+ let setStartPos = case toks of
+ Tok pos _ _ : _ -> setPosition pos
+ _ -> return ()
+ let preparser = setStartPos >> parser
let rawparser = (,) <$> withRaw valParser <*> getState
- res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks
+ res' <- lift $ runParserT (withRaw (preparser >> getPosition))
+ lstate "chunk" toks
case res' of
Left _ -> mzero
- Right toks' -> do
+ Right (endpos, toks') -> do
res <- lift $ runParserT (do when retokenize $ do
-- retokenize, applying macros
- doMacros
ts <- many (satisfyTok (const True))
setInput ts
rawparser)
@@ -255,7 +275,13 @@ rawLaTeXParser toks retokenize parser valParser = do
Left _ -> mzero
Right ((val, raw), st) -> do
updateState (updateMacros (sMacros st <>))
- _ <- takeP (T.length (untokenize toks'))
+ let skipTilPos stopPos = do
+ anyChar
+ pos <- getPosition
+ if pos >= stopPos
+ then return ()
+ else skipTilPos stopPos
+ skipTilPos endpos
let result = untokenize raw
-- ensure we end with space if input did, see #4442
let result' =
@@ -268,7 +294,7 @@ rawLaTeXParser toks retokenize parser valParser = do
return (val, result')
applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
- => Text -> ParserT Text s m Text
+ => Text -> ParserT Sources s m Text
applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
do let retokenize = untokenize <$> many (satisfyTok (const True))
pstate <- getState
@@ -279,6 +305,31 @@ applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
Left e -> Prelude.fail (show e)
Right s' -> return s'
+{-
+When tokenize or untokenize change, test with this
+QuickCheck property:
+
+> tokUntokRoundtrip :: String -> Bool
+> tokUntokRoundtrip s =
+> let t = T.pack s in untokenize (tokenize "random" t) == t
+-}
+
+tokenizeSources :: Sources -> [Tok]
+tokenizeSources = concatMap tokenizeSource . unSources
+ where
+ tokenizeSource (pos, t) = totoks pos t
+
+-- Return tokens from input sources. Ensure that starting position is
+-- correct.
+getInputTokens :: PandocMonad m => ParserT Sources s m [Tok]
+getInputTokens = do
+ pos <- getPosition
+ ss <- getInput
+ return $
+ case ss of
+ Sources [] -> []
+ Sources ((_,t):rest) -> tokenizeSources $ Sources ((pos,t):rest)
+
tokenize :: SourceName -> Text -> [Tok]
tokenize sourcename = totoks (initialPos sourcename)
@@ -402,41 +453,62 @@ untoken t = untokenAccum t mempty
toksToString :: [Tok] -> String
toksToString = T.unpack . untokenize
+parseFromToks :: PandocMonad m => LP m a -> [Tok] -> LP m a
+parseFromToks parser toks = do
+ oldInput <- getInput
+ setInput toks
+ oldpos <- getPosition
+ case toks of
+ Tok pos _ _ : _ -> setPosition pos
+ _ -> return ()
+ result <- disablingWithRaw parser
+ setInput oldInput
+ setPosition oldpos
+ return result
+
+disablingWithRaw :: PandocMonad m => LP m a -> LP m a
+disablingWithRaw parser = do
+ oldEnableWithRaw <- sEnableWithRaw <$> getState
+ updateState $ \st -> st{ sEnableWithRaw = False }
+ result <- parser
+ updateState $ \st -> st{ sEnableWithRaw = oldEnableWithRaw }
+ return result
+
satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
satisfyTok f = do
doMacros -- apply macros on remaining input stream
res <- tokenPrim (T.unpack . untoken) updatePos matcher
- updateState $ \st -> st{ sExpanded = False }
- return res
+ updateState $ \st ->
+ if sEnableWithRaw st
+ then st{ sRawTokens = IntMap.map (res:) $ sRawTokens st }
+ else st
+ return $! res
where matcher t | f t = Just t
| otherwise = Nothing
updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
updatePos _spos _ (Tok pos _ _ : _) = pos
- updatePos spos _ [] = incSourceColumn spos 1
+ updatePos spos (Tok _ _ t) [] = incSourceColumn spos (T.length t)
doMacros :: PandocMonad m => LP m ()
doMacros = do
- expanded <- sExpanded <$> getState
- verbatimMode <- sVerbatimMode <$> getState
- unless (expanded || verbatimMode) $ do
- getInput >>= doMacros' 1 >>= setInput
- updateState $ \st -> st{ sExpanded = True }
+ st <- getState
+ unless (sVerbatimMode st) $
+ getInput >>= doMacros' 1 >>= setInput
doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok]
doMacros' n inp =
case inp of
Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
Tok _ Word name : Tok _ Symbol "}" : ts
- -> handleMacros n spos name ts
+ -> handleMacros n spos name ts <|> return inp
Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
Tok _ Word name : Tok _ Symbol "}" : ts
- -> handleMacros n spos ("end" <> name) ts
+ -> handleMacros n spos ("end" <> name) ts <|> return inp
Tok _ (CtrlSeq "expandafter") _ : t : ts
-> combineTok t <$> doMacros' n ts
Tok spos (CtrlSeq name) _ : ts
- -> handleMacros n spos name ts
+ -> handleMacros n spos name ts <|> return inp
_ -> return inp
- <|> return inp
where
combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
@@ -482,7 +554,7 @@ doMacros' n inp =
$ throwError $ PandocMacroLoop name
macros <- sMacros <$> getState
case M.lookup name macros of
- Nothing -> mzero
+ Nothing -> trySpecialMacro name ts
Just (Macro expansionPoint argspecs optarg newtoks) -> do
let getargs' = do
args <-
@@ -510,6 +582,41 @@ doMacros' n inp =
ExpandWhenUsed -> doMacros' (n' + 1) result
ExpandWhenDefined -> return result
+-- | Certain macros do low-level tex manipulations that can't
+-- be represented in our Macro type, so we handle them here.
+trySpecialMacro :: PandocMonad m => Text -> [Tok] -> LP m [Tok]
+trySpecialMacro "xspace" ts = do
+ ts' <- doMacros' 1 ts
+ case ts' of
+ Tok pos Word t : _
+ | startsWithAlphaNum t -> return $ Tok pos Spaces " " : ts'
+ _ -> return ts'
+trySpecialMacro "iftrue" ts = handleIf True ts
+trySpecialMacro "iffalse" ts = handleIf False ts
+trySpecialMacro _ _ = mzero
+
+handleIf :: PandocMonad m => Bool -> [Tok] -> LP m [Tok]
+handleIf b ts = do
+ res' <- lift $ runParserT (ifParser b) defaultLaTeXState "tokens" ts
+ case res' of
+ Left _ -> Prelude.fail "Could not parse conditional"
+ Right ts' -> return ts'
+
+ifParser :: PandocMonad m => Bool -> LP m [Tok]
+ifParser b = do
+ ifToks <- many (notFollowedBy (controlSeq "else" <|> controlSeq "fi")
+ *> anyTok)
+ elseToks <- (controlSeq "else" >> manyTill anyTok (controlSeq "fi"))
+ <|> ([] <$ controlSeq "fi")
+ rest <- getInput
+ return $ (if b then ifToks else elseToks) ++ rest
+
+startsWithAlphaNum :: Text -> Bool
+startsWithAlphaNum t =
+ case T.uncons t of
+ Just (c, _) | isAlphaNum c -> True
+ _ -> False
+
setpos :: SourcePos -> Tok -> Tok
setpos spos (Tok _ tt txt) = Tok spos tt txt
@@ -592,18 +699,22 @@ isCommentTok _ = False
anyTok :: PandocMonad m => LP m Tok
anyTok = satisfyTok (const True)
+singleCharTok :: PandocMonad m => LP m Tok
+singleCharTok =
+ satisfyTok $ \case
+ Tok _ Word t -> T.length t == 1
+ Tok _ Symbol t -> not (T.any (`Set.member` specialChars) t)
+ _ -> False
+
singleChar :: PandocMonad m => LP m Tok
-singleChar = try $ do
- Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
- guard $ not $ toktype == Symbol &&
- T.any (`Set.member` specialChars) t
- if T.length t > 1
- then do
- let (t1, t2) = (T.take 1 t, T.drop 1 t)
- inp <- getInput
- setInput $ Tok (incSourceColumn pos 1) toktype t2 : inp
- return $ Tok pos toktype t1
- else return $ Tok pos toktype t
+singleChar = singleCharTok <|> singleCharFromWord
+ where
+ singleCharFromWord = do
+ Tok pos toktype t <- disablingWithRaw $ satisfyTok isWordTok
+ let (t1, t2) = (T.take 1 t, T.drop 1 t)
+ inp <- getInput
+ setInput $ Tok pos toktype t1 : Tok (incSourceColumn pos 1) toktype t2 : inp
+ anyTok
specialChars :: Set.Set Char
specialChars = Set.fromList "#$%&~_^\\{}"
@@ -646,28 +757,25 @@ grouped parser = try $ do
-- {{a,b}} should be parsed the same as {a,b}
try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup)
-braced' :: PandocMonad m => LP m Tok -> Int -> LP m [Tok]
-braced' getTok n =
- handleEgroup <|> handleBgroup <|> handleOther
- where handleEgroup = do
- t <- symbol '}'
- if n == 1
- then return []
- else (t:) <$> braced' getTok (n - 1)
- handleBgroup = do
- t <- symbol '{'
- (t:) <$> braced' getTok (n + 1)
- handleOther = do
- t <- getTok
- (t:) <$> braced' getTok n
+braced' :: PandocMonad m => LP m Tok -> LP m [Tok]
+braced' getTok = symbol '{' *> go (1 :: Int)
+ where
+ go n = do
+ t <- getTok
+ case t of
+ Tok _ Symbol "}"
+ | n > 1 -> (t:) <$> go (n - 1)
+ | otherwise -> return []
+ Tok _ Symbol "{" -> (t:) <$> go (n + 1)
+ _ -> (t:) <$> go n
braced :: PandocMonad m => LP m [Tok]
-braced = symbol '{' *> braced' anyTok 1
+braced = braced' anyTok
-- URLs require special handling, because they can contain %
-- characters. So we retonenize comments as we go...
bracedUrl :: PandocMonad m => LP m [Tok]
-bracedUrl = bgroup *> braced' (retokenizeComment >> anyTok) 1
+bracedUrl = braced' (retokenizeComment >> anyTok)
-- For handling URLs, which allow literal % characters...
retokenizeComment :: PandocMonad m => LP m ()
@@ -723,16 +831,29 @@ ignore raw = do
withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok])
withRaw parser = do
- inp <- getInput
+ rawTokensMap <- sRawTokens <$> getState
+ let key = case IntMap.lookupMax rawTokensMap of
+ Nothing -> 0
+ Just (n,_) -> n + 1
+ -- insert empty list at key
+ updateState $ \st -> st{ sRawTokens =
+ IntMap.insert key [] $ sRawTokens st }
result <- parser
- nxtpos <- option Nothing ((\(Tok pos' _ _) -> Just pos') <$> lookAhead anyTok)
- let raw = takeWhile (\(Tok pos _ _) -> maybe True
- (\p -> sourceName p /= sourceName pos || pos < p) nxtpos) inp
+ mbRevToks <- IntMap.lookup key . sRawTokens <$> getState
+ raw <- case mbRevToks of
+ Just revtoks -> do
+ updateState $ \st -> st{ sRawTokens =
+ IntMap.delete key $ sRawTokens st}
+ return $ reverse revtoks
+ Nothing ->
+ throwError $ PandocShouldNeverHappenError $
+ "sRawTokens has nothing at key " <> T.pack (show key)
return (result, raw)
keyval :: PandocMonad m => LP m (Text, Text)
keyval = try $ do
- Tok _ Word key <- satisfyTok isWordTok
+ key <- untokenize <$> many1 (notFollowedBy (symbol '=') >>
+ (symbol '-' <|> symbol '_' <|> satisfyTok isWordTok))
sp
val <- option mempty $ do
symbol '='
@@ -792,7 +913,7 @@ getRawCommand name txt = do
(_, rawargs) <- withRaw $
case name of
"write" -> do
- void $ satisfyTok isWordTok -- digits
+ void $ many $ satisfyTok isDigitTok -- digits
void braced
"titleformat" -> do
void braced
@@ -807,6 +928,10 @@ getRawCommand name txt = do
void $ many braced
return $ txt <> untokenize rawargs
+isDigitTok :: Tok -> Bool
+isDigitTok (Tok _ Word t) = T.all isDigit t
+isDigitTok _ = False
+
skipopts :: PandocMonad m => LP m ()
skipopts = skipMany (void overlaySpecification <|> void rawopt)
@@ -874,3 +999,35 @@ getNextNumber getCurrentNum = do
Just n -> [n, 1]
Nothing -> [1]
+label :: PandocMonad m => LP m ()
+label = do
+ controlSeq "label"
+ t <- braced
+ updateState $ \st -> st{ sLastLabel = Just $ untokenize t }
+
+setCaption :: PandocMonad m => LP m Inlines -> LP m ()
+setCaption inline = try $ do
+ skipopts
+ ils <- tokWith inline
+ optional $ try $ spaces *> label
+ updateState $ \st -> st{ sCaption = Just ils }
+
+resetCaption :: PandocMonad m => LP m ()
+resetCaption = updateState $ \st -> st{ sCaption = Nothing
+ , sLastLabel = Nothing }
+
+env :: PandocMonad m => Text -> LP m a -> LP m a
+env name p = p <* end_ name
+
+tokWith :: PandocMonad m => LP m Inlines -> LP m Inlines
+tokWith inlineParser = try $ spaces >>
+ grouped inlineParser
+ <|> (lookAhead anyControlSeq >> inlineParser)
+ <|> singleChar'
+ where singleChar' = do
+ Tok _ _ t <- singleChar
+ return $ str t
+
+addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m ()
+addMeta field val = updateState $ \st ->
+ st{ sMeta = addMetaField field val $ sMeta st }
diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index db9c276e7..b8bf0ce7f 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -1,12 +1,7 @@
+{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.Pandoc.Readers.LaTeX.SIunitx
- ( dosi
- , doSI
- , doSIrange
- , doSInum
- , doSInumlist
- , doSIang
- )
+ ( siunitxCommands )
where
import Text.Pandoc.Builder
import Text.Pandoc.Readers.LaTeX.Parsing
@@ -15,14 +10,32 @@ import Text.Pandoc.Class
import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
optional, space, spaces, withRaw, (<|>))
import Control.Applicative ((<|>))
+import Control.Monad (void)
import qualified Data.Map as M
import Data.Char (isDigit)
import Data.Text (Text)
import qualified Data.Text as T
import Data.List (intersperse)
+import qualified Data.Sequence as Seq
+import Text.Pandoc.Walk (walk)
+
+siunitxCommands :: PandocMonad m
+ => LP m Inlines -> M.Map Text (LP m Inlines)
+siunitxCommands tok = M.fromList
+ [ ("si", dosi tok)
+ , ("SI", doSI tok)
+ , ("SIrange", doSIrange True tok)
+ , ("numrange", doSIrange False tok)
+ , ("numlist", doSInumlist)
+ , ("SIlist", doSIlist tok)
+ , ("num", doSInum)
+ , ("ang", doSIang)
+ ]
dosi :: PandocMonad m => LP m Inlines -> LP m Inlines
-dosi tok = grouped (siUnit tok) <|> siUnit tok
+dosi tok = do
+ options <- option [] keyvals
+ grouped (siUnit options tok) <|> siUnit options tok
-- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €"
doSI :: PandocMonad m => LP m Inlines -> LP m Inlines
@@ -57,23 +70,50 @@ doSInumlist = do
mconcat (intersperse (str "," <> space) (init xs)) <>
text ", & " <> last xs
+doSIlist :: PandocMonad m => LP m Inlines -> LP m Inlines
+doSIlist tok = do
+ options <- option [] keyvals
+ nums <- map tonum . T.splitOn ";" . untokenize <$> braced
+ unit <- grouped (siUnit options tok) <|> siUnit options tok
+ let xs = map (<> (str "\xa0" <> unit)) nums
+ case xs of
+ [] -> return mempty
+ [x] -> return x
+ _ -> return $
+ mconcat (intersperse (str "," <> space) (init xs)) <>
+ text ", & " <> last xs
+
parseNum :: Parser Text () Inlines
parseNum = (mconcat <$> many parseNumPart) <* eof
+minus :: Text
+minus = "\x2212"
+
+hyphenToMinus :: Inline -> Inline
+hyphenToMinus (Str t) = Str (T.replace "-" minus t)
+hyphenToMinus x = x
+
parseNumPart :: Parser Text () Inlines
parseNumPart =
parseDecimalNum <|>
parseComma <|>
parsePlusMinus <|>
+ parsePM <|>
parseI <|>
parseExp <|>
parseX <|>
parseSpace
where
- parseDecimalNum = do
- pref <- option mempty $ (mempty <$ char '+') <|> ("\x2212" <$ char '-')
- basenum <- (pref <>) . T.pack
- <$> many1 (satisfy (\c -> isDigit c || c == '.'))
+ parseDecimalNum, parsePlusMinus, parsePM,
+ parseComma, parseI, parseX,
+ parseExp, parseSpace :: Parser Text () Inlines
+ parseDecimalNum = try $ do
+ pref <- option mempty $ (mempty <$ char '+') <|> (minus <$ char '-')
+ basenum' <- many1 (satisfy (\c -> isDigit c || c == '.'))
+ let basenum = pref <> T.pack
+ (case basenum' of
+ '.':_ -> '0':basenum'
+ _ -> basenum')
uncertainty <- option mempty $ T.pack <$> parseParens
if T.null uncertainty
then return $ str basenum
@@ -91,6 +131,7 @@ parseNumPart =
| otherwise -> "." <> t
parseComma = str "." <$ char ','
parsePlusMinus = str "\xa0\xb1\xa0" <$ try (string "+-")
+ parsePM = str "\xa0\xb1\xa0" <$ try (string "\\pm")
parseParens =
char '(' *> many1 (satisfy (\c -> isDigit c || c == '.')) <* char ')'
parseI = str "i" <$ char 'i'
@@ -103,11 +144,14 @@ doSIang :: PandocMonad m => LP m Inlines
doSIang = do
skipopts
ps <- T.splitOn ";" . untokenize <$> braced
+ let dropPlus t = case T.uncons t of
+ Just ('+',t') -> t'
+ _ -> t
case ps ++ repeat "" of
(d:m:s:_) -> return $
- (if T.null d then mempty else str d <> str "\xb0") <>
- (if T.null m then mempty else str m <> str "\x2032") <>
- (if T.null s then mempty else str s <> str "\x2033")
+ (if T.null d then mempty else str (dropPlus d) <> str "\xb0") <>
+ (if T.null m then mempty else str (dropPlus m) <> str "\x2032") <>
+ (if T.null s then mempty else str (dropPlus s) <> str "\x2033")
_ -> return mempty
-- converts e.g. \SIrange{100}{200}{\ms} to "100 ms--200 ms"
@@ -136,40 +180,99 @@ doSIrange includeUnits tok = do
emptyOr160 :: Inlines -> Inlines
emptyOr160 x = if x == mempty then x else str "\160"
-siUnit :: PandocMonad m => LP m Inlines -> LP m Inlines
-siUnit tok = try (do
- Tok _ (CtrlSeq name) _ <- anyControlSeq
- case name of
- "square" -> do
- unit <- siUnit tok
- return $ unit <> superscript "2"
- "cubic" -> do
- unit <- siUnit tok
- return $ unit <> superscript "3"
- "raisetothe" -> do
- n <- tok
- unit <- siUnit tok
- return $ unit <> superscript n
- _ ->
- case M.lookup name siUnitMap of
- Just il ->
- option il $
- choice
- [ (il <> superscript "2") <$ controlSeq "squared"
- , (il <> superscript "3") <$ controlSeq "cubed"
- , (\n -> il <> superscript n) <$> (controlSeq "tothe" *> tok)
- ]
- Nothing -> fail "not an siunit unit command")
- <|> (lookAhead anyControlSeq >> tok)
- <|> (do Tok _ Word t <- satisfyTok isWordTok
- return $ str t)
- <|> (symbol '^' *> (superscript <$> tok))
- <|> (symbol '_' *> (subscript <$> tok))
- <|> ("\xa0" <$ symbol '.')
- <|> ("\xa0" <$ symbol '~')
- <|> tok
- <|> (do Tok _ _ t <- anyTok
- return (str t))
+siUnit :: forall m. PandocMonad m => [(Text,Text)] -> LP m Inlines -> LP m Inlines
+siUnit options tok = mconcat . intersperse (str "\xa0") <$> many1 siUnitPart
+ where
+ siUnitPart :: LP m Inlines
+ siUnitPart = try $ do
+ skipMany (void (symbol '.') <|> void (symbol '~') <|> spaces1)
+ x <- ((siPrefix <*> siBase)
+ <|> (do u <- siBase <|> tok
+ option u $ siSuffix <*> pure u))
+ option x (siInfix x)
+ siInfix :: Inlines -> LP m Inlines
+ siInfix u1 = try $
+ (do _ <- controlSeq "per"
+ u2 <- siUnitPart
+ let useSlash = lookup "per-mode" options == Just "symbol"
+ if useSlash
+ then return (u1 <> str "/" <> u2)
+ else return (u1 <> str "\xa0" <> negateExponent u2))
+ <|> (do _ <- symbol '/'
+ u2 <- siUnitPart
+ return (u1 <> str "/" <> u2))
+ siPrefix :: LP m (Inlines -> Inlines)
+ siPrefix =
+ (do _ <- controlSeq "square"
+ skipopts
+ return (<> superscript "2"))
+ <|> (do _ <- controlSeq "cubic"
+ skipopts
+ return (<> superscript "3"))
+ <|> (do _ <- controlSeq "raisetothe"
+ skipopts
+ n <- walk hyphenToMinus <$> tok
+ return (<> superscript n))
+ siSuffix :: LP m (Inlines -> Inlines)
+ siSuffix =
+ (do _ <- controlSeq "squared"
+ skipopts
+ return (<> superscript "2"))
+ <|> (do _ <- controlSeq "cubed"
+ skipopts
+ return (<> superscript "3"))
+ <|> (do _ <- controlSeq "tothe"
+ skipopts
+ n <- walk hyphenToMinus <$> tok
+ return (<> superscript n))
+ <|> (symbol '^' *> (do n <- walk hyphenToMinus <$> tok
+ return (<> superscript n)))
+ <|> (symbol '_' *> (do n <- walk hyphenToMinus <$> tok
+ return (<> subscript n)))
+ negateExponent :: Inlines -> Inlines
+ negateExponent ils =
+ case Seq.viewr (unMany ils) of
+ xs Seq.:> Superscript ss -> (Many xs) <>
+ superscript (str minus <> fromList ss)
+ _ -> ils <> superscript (str (minus <> "1"))
+ siBase :: LP m Inlines
+ siBase =
+ ((try
+ (do Tok _ (CtrlSeq name) _ <- anyControlSeq
+ case M.lookup name siUnitModifierMap of
+ Just il -> (il <>) <$> siBase
+ Nothing ->
+ case M.lookup name siUnitMap of
+ Just il -> pure il
+ Nothing -> fail "not a unit command"))
+ <|> (do Tok _ Word t <- satisfyTok isWordTok
+ return $ str t)
+ )
+
+siUnitModifierMap :: M.Map Text Inlines
+siUnitModifierMap = M.fromList
+ [ ("atto", str "a")
+ , ("centi", str "c")
+ , ("deca", str "d")
+ , ("deci", str "d")
+ , ("deka", str "d")
+ , ("exa", str "E")
+ , ("femto", str "f")
+ , ("giga", str "G")
+ , ("hecto", str "h")
+ , ("kilo", str "k")
+ , ("mega", str "M")
+ , ("micro", str "μ")
+ , ("milli", str "m")
+ , ("nano", str "n")
+ , ("peta", str "P")
+ , ("pico", str "p")
+ , ("tera", str "T")
+ , ("yocto", str "y")
+ , ("yotta", str "Y")
+ , ("zepto", str "z")
+ , ("zetta", str "Z")
+ ]
siUnitMap :: M.Map Text Inlines
siUnitMap = M.fromList
@@ -269,7 +372,6 @@ siUnitMap = M.fromList
, ("arcsecond", str "″")
, ("astronomicalunit", str "ua")
, ("atomicmassunit", str "u")
- , ("atto", str "a")
, ("bar", str "bar")
, ("barn", str "b")
, ("becquerel", str "Bq")
@@ -277,51 +379,38 @@ siUnitMap = M.fromList
, ("bohr", emph (str "a") <> subscript (str "0"))
, ("candela", str "cd")
, ("celsius", str "°C")
- , ("centi", str "c")
, ("clight", emph (str "c") <> subscript (str "0"))
, ("coulomb", str "C")
, ("dalton", str "Da")
, ("day", str "d")
- , ("deca", str "d")
- , ("deci", str "d")
, ("decibel", str "db")
, ("degreeCelsius",str "°C")
, ("degree", str "°")
- , ("deka", str "d")
, ("electronmass", emph (str "m") <> subscript (str "e"))
, ("electronvolt", str "eV")
, ("elementarycharge", emph (str "e"))
- , ("exa", str "E")
, ("farad", str "F")
- , ("femto", str "f")
- , ("giga", str "G")
, ("gram", str "g")
, ("gray", str "Gy")
, ("hartree", emph (str "E") <> subscript (str "h"))
, ("hectare", str "ha")
- , ("hecto", str "h")
, ("henry", str "H")
, ("hertz", str "Hz")
, ("hour", str "h")
, ("joule", str "J")
, ("katal", str "kat")
, ("kelvin", str "K")
- , ("kilo", str "k")
, ("kilogram", str "kg")
, ("knot", str "kn")
, ("liter", str "L")
, ("litre", str "l")
, ("lumen", str "lm")
, ("lux", str "lx")
- , ("mega", str "M")
, ("meter", str "m")
, ("metre", str "m")
- , ("micro", str "μ")
- , ("milli", str "m")
, ("minute", str "min")
, ("mmHg", str "mmHg")
, ("mole", str "mol")
- , ("nano", str "n")
, ("nauticalmile", str "M")
, ("neper", str "Np")
, ("newton", str "N")
@@ -329,25 +418,17 @@ siUnitMap = M.fromList
, ("Pa", str "Pa")
, ("pascal", str "Pa")
, ("percent", str "%")
- , ("per", str "/")
- , ("peta", str "P")
- , ("pico", str "p")
, ("planckbar", emph (str "\x210f"))
, ("radian", str "rad")
, ("second", str "s")
, ("siemens", str "S")
, ("sievert", str "Sv")
, ("steradian", str "sr")
- , ("tera", str "T")
, ("tesla", str "T")
, ("tonne", str "t")
, ("volt", str "V")
, ("watt", str "W")
, ("weber", str "Wb")
- , ("yocto", str "y")
- , ("yotta", str "Y")
- , ("zepto", str "z")
- , ("zetta", str "Z")
]
diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs
new file mode 100644
index 000000000..f56728fe1
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs
@@ -0,0 +1,379 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Table
+ ( tableEnvironments )
+where
+
+import Data.Functor (($>))
+import Text.Pandoc.Class
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Types
+import Text.Pandoc.Builder as B
+import qualified Data.Map as M
+import Data.Text (Text)
+import Data.Maybe (fromMaybe)
+import qualified Data.Text as T
+import Control.Applicative ((<|>), optional, many)
+import Control.Monad (when, void)
+import Text.Pandoc.Shared (safeRead, trim)
+import Text.Pandoc.Logging (LogMessage(SkippedContent))
+import Text.Pandoc.Walk (walkM)
+import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
+ optional, space, spaces, withRaw, (<|>))
+
+tableEnvironments :: PandocMonad m
+ => LP m Blocks
+ -> LP m Inlines
+ -> M.Map Text (LP m Blocks)
+tableEnvironments blocks inline =
+ M.fromList
+ [ ("longtable", env "longtable" $
+ resetCaption *>
+ simpTable blocks inline "longtable" False >>= addTableCaption)
+ , ("table", env "table" $
+ skipopts *> resetCaption *> blocks >>= addTableCaption)
+ , ("tabular*", env "tabular*" $ simpTable blocks inline "tabular*" True)
+ , ("tabularx", env "tabularx" $ simpTable blocks inline "tabularx" True)
+ , ("tabular", env "tabular" $ simpTable blocks inline "tabular" False)
+ ]
+
+hline :: PandocMonad m => LP m ()
+hline = try $ do
+ spaces
+ controlSeq "hline" <|>
+ (controlSeq "cline" <* braced) <|>
+ -- booktabs rules:
+ controlSeq "toprule" <|>
+ controlSeq "bottomrule" <|>
+ controlSeq "midrule" <|>
+ controlSeq "endhead" <|>
+ controlSeq "endfirsthead"
+ spaces
+ optional rawopt
+ return ()
+
+lbreak :: PandocMonad m => LP m Tok
+lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline")
+ <* skipopts <* spaces
+
+amp :: PandocMonad m => LP m Tok
+amp = symbol '&'
+
+-- Split a Word into individual Symbols (for parseAligns)
+splitWordTok :: PandocMonad m => LP m ()
+splitWordTok = do
+ inp <- getInput
+ case inp of
+ (Tok spos Word t : rest) ->
+ setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
+ _ -> return ()
+
+parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))]
+parseAligns = try $ do
+ let maybeBar = skipMany
+ (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced)))
+ let cAlign = AlignCenter <$ symbol 'c'
+ let lAlign = AlignLeft <$ symbol 'l'
+ let rAlign = AlignRight <$ symbol 'r'
+ let parAlign = AlignLeft <$ symbol 'p'
+ -- aligns from tabularx
+ let xAlign = AlignLeft <$ symbol 'X'
+ let mAlign = AlignLeft <$ symbol 'm'
+ let bAlign = AlignLeft <$ symbol 'b'
+ let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign
+ <|> xAlign <|> mAlign <|> bAlign )
+ let alignPrefix = symbol '>' >> braced
+ let alignSuffix = symbol '<' >> braced
+ let colWidth = try $ do
+ symbol '{'
+ ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth")
+ spaces
+ symbol '}'
+ return $ safeRead ds
+ let alignSpec = do
+ pref <- option [] alignPrefix
+ spaces
+ al <- alignChar
+ width <- colWidth <|> option Nothing (do s <- untokenize <$> braced
+ pos <- getPosition
+ report $ SkippedContent s pos
+ return Nothing)
+ spaces
+ suff <- option [] alignSuffix
+ return (al, width, (pref, suff))
+ let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro
+ symbol '*'
+ spaces
+ ds <- trim . untokenize <$> braced
+ spaces
+ spec <- braced
+ case safeRead ds of
+ Just n ->
+ getInput >>= setInput . (mconcat (replicate n spec) ++)
+ Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number"
+ bgroup
+ spaces
+ maybeBar
+ aligns' <- many $ try $ spaces >> optional starAlign >>
+ (alignSpec <* maybeBar)
+ spaces
+ egroup
+ spaces
+ return $ map toSpec aligns'
+ where
+ toColWidth (Just w) | w > 0 = ColWidth w
+ toColWidth _ = ColWidthDefault
+ toSpec (x, y, z) = (x, toColWidth y, z)
+
+-- N.B. this parser returns a Row that may have erroneous empty cells
+-- in it. See the note above fixTableHead for details.
+parseTableRow :: PandocMonad m
+ => LP m Blocks -- ^ block parser
+ -> LP m Inlines -- ^ inline parser
+ -> Text -- ^ table environment name
+ -> [([Tok], [Tok])] -- ^ pref/suffixes
+ -> LP m Row
+parseTableRow blocks inline envname prefsufs = do
+ notFollowedBy (spaces *> end_ envname)
+ -- contexts that can contain & that is not colsep:
+ let canContainAmp (Tok _ (CtrlSeq "begin") _) = True
+ canContainAmp (Tok _ (CtrlSeq "verb") _) = True
+ canContainAmp (Tok _ (CtrlSeq "Verb") _) = True
+ canContainAmp _ = False
+ -- add prefixes and suffixes in token stream:
+ let celltoks (pref, suff) = do
+ prefpos <- getPosition
+ contents <- mconcat <$>
+ many ( snd <$> withRaw
+ ((lookAhead (controlSeq "parbox") >>
+ void blocks) -- #5711
+ <|>
+ (lookAhead (satisfyTok canContainAmp) >> void inline)
+ <|>
+ (lookAhead (symbol '$') >> void inline))
+ <|>
+ (do notFollowedBy
+ (() <$ amp <|> () <$ lbreak <|> end_ envname)
+ count 1 anyTok) )
+
+ suffpos <- getPosition
+ option [] (count 1 amp)
+ return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
+ rawcells <- mapM celltoks prefsufs
+ cells <- mapM (parseFromToks (parseTableCell blocks)) rawcells
+ spaces
+ return $ Row nullAttr cells
+
+parseTableCell :: PandocMonad m => LP m Blocks -> LP m Cell
+parseTableCell blocks = do
+ spaces
+ updateState $ \st -> st{ sInTableCell = True }
+ cell' <- multicolumnCell blocks
+ <|> multirowCell blocks
+ <|> parseSimpleCell
+ <|> parseEmptyCell
+ updateState $ \st -> st{ sInTableCell = False }
+ spaces
+ return cell'
+ where
+ -- The parsing of empty cells is important in LaTeX, especially when dealing
+ -- with multirow/multicolumn. See #6603.
+ parseEmptyCell = spaces $> emptyCell
+ parseSimpleCell = simpleCell <$> (plainify <$> blocks)
+
+
+cellAlignment :: PandocMonad m => LP m Alignment
+cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|')
+ where
+ alignment = do
+ c <- untoken <$> singleChar
+ return $ case c of
+ "l" -> AlignLeft
+ "r" -> AlignRight
+ "c" -> AlignCenter
+ "*" -> AlignDefault
+ _ -> AlignDefault
+
+plainify :: Blocks -> Blocks
+plainify bs = case toList bs of
+ [Para ils] -> plain (fromList ils)
+ _ -> bs
+
+multirowCell :: PandocMonad m => LP m Blocks -> LP m Cell
+multirowCell blocks = controlSeq "multirow" >> do
+ -- Full prototype for \multirow macro is:
+ -- \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text}
+ -- However, everything except `nrows` and `text` make
+ -- sense in the context of the Pandoc AST
+ _ <- optional $ symbol '[' *> cellAlignment <* symbol ']' -- vertical position
+ nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced
+ _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- bigstrut-related
+ _ <- symbol '{' *> manyTill anyTok (symbol '}') -- Cell width
+ _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- Length used for fine-tuning
+ content <- symbol '{' *> (plainify <$> blocks) <* symbol '}'
+ return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content
+
+multicolumnCell :: PandocMonad m => LP m Blocks -> LP m Cell
+multicolumnCell blocks = controlSeq "multicolumn" >> do
+ span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced
+ alignment <- symbol '{' *> cellAlignment <* symbol '}'
+
+ let singleCell = do
+ content <- plainify <$> blocks
+ return $ cell alignment (RowSpan 1) (ColSpan span') content
+
+ -- Two possible contents: either a \multirow cell, or content.
+ -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}}
+ -- Note that a \multirow cell can be nested in a \multicolumn,
+ -- but not the other way around. See #6603
+ let nestedCell = do
+ (Cell _ _ (RowSpan rs) _ bs) <- multirowCell blocks
+ return $ cell
+ alignment
+ (RowSpan rs)
+ (ColSpan span')
+ (fromList bs)
+
+ symbol '{' *> (nestedCell <|> singleCell) <* symbol '}'
+
+-- LaTeX tables are stored with empty cells underneath multirow cells
+-- denoting the grid spaces taken up by them. More specifically, if a
+-- cell spans m rows, then it will overwrite all the cells in the
+-- columns it spans for (m-1) rows underneath it, requiring padding
+-- cells in these places. These padding cells need to be removed for
+-- proper table reading. See #6603.
+--
+-- These fixTable functions do not otherwise fix up malformed
+-- input tables: that is left to the table builder.
+fixTableHead :: TableHead -> TableHead
+fixTableHead (TableHead attr rows) = TableHead attr rows'
+ where
+ rows' = fixTableRows rows
+
+fixTableBody :: TableBody -> TableBody
+fixTableBody (TableBody attr rhc th tb)
+ = TableBody attr rhc th' tb'
+ where
+ th' = fixTableRows th
+ tb' = fixTableRows tb
+
+fixTableRows :: [Row] -> [Row]
+fixTableRows = fixTableRows' $ repeat Nothing
+ where
+ fixTableRows' oldHang (Row attr cells : rs)
+ = let (newHang, cells') = fixTableRow oldHang cells
+ rs' = fixTableRows' newHang rs
+ in Row attr cells' : rs'
+ fixTableRows' _ [] = []
+
+-- The overhang is represented as Just (relative cell dimensions) or
+-- Nothing for an empty grid space.
+fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
+fixTableRow oldHang cells
+ -- If there's overhang, drop cells until their total width meets the
+ -- width of the occupied grid spaces (or we run out)
+ | (n, prefHang, restHang) <- splitHang oldHang
+ , n > 0
+ = let cells' = dropToWidth getCellW n cells
+ (restHang', cells'') = fixTableRow restHang cells'
+ in (prefHang restHang', cells'')
+ -- Otherwise record the overhang of a pending cell and fix the rest
+ -- of the row
+ | c@(Cell _ _ h w _):cells' <- cells
+ = let h' = max 1 h
+ w' = max 1 w
+ oldHang' = dropToWidth getHangW w' oldHang
+ (newHang, cells'') = fixTableRow oldHang' cells'
+ in (toHang w' h' <> newHang, c : cells'')
+ | otherwise
+ = (oldHang, [])
+ where
+ getCellW (Cell _ _ _ w _) = w
+ getHangW = maybe 1 fst
+ getCS (ColSpan n) = n
+
+ toHang c r
+ | r > 1 = [Just (c, r)]
+ | otherwise = replicate (getCS c) Nothing
+
+ -- Take the prefix of the overhang list representing filled grid
+ -- spaces. Also return the remainder and the length of this prefix.
+ splitHang = splitHang' 0 id
+
+ splitHang' !n l (Just (c, r):xs)
+ = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
+ splitHang' n l xs = (n, l, xs)
+
+ -- Drop list items until the total width of the dropped items
+ -- exceeds the passed width.
+ dropToWidth _ n l | n < 1 = l
+ dropToWidth wproj n (c:cs) = dropToWidth wproj (n - wproj c) cs
+ dropToWidth _ _ [] = []
+
+simpTable :: PandocMonad m
+ => LP m Blocks
+ -> LP m Inlines
+ -> Text
+ -> Bool
+ -> LP m Blocks
+simpTable blocks inline envname hasWidthParameter = try $ do
+ when hasWidthParameter $ () <$ tokWith inline
+ skipopts
+ colspecs <- parseAligns
+ let (aligns, widths, prefsufs) = unzip3 colspecs
+ optional $ controlSeq "caption" *> setCaption inline
+ spaces
+ optional label
+ spaces
+ optional lbreak
+ spaces
+ skipMany hline
+ spaces
+ header' <- option [] . try . fmap (:[]) $
+ parseTableRow blocks inline envname prefsufs <*
+ lbreak <* many1 hline
+ spaces
+ rows <- sepEndBy (parseTableRow blocks inline envname prefsufs)
+ (lbreak <* optional (skipMany hline))
+ spaces
+ optional $ controlSeq "caption" *> setCaption inline
+ spaces
+ optional label
+ spaces
+ optional lbreak
+ spaces
+ lookAhead $ controlSeq "end" -- make sure we're at end
+ let th = fixTableHead $ TableHead nullAttr header'
+ let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
+ let tf = TableFoot nullAttr []
+ return $ table emptyCaption (zip aligns widths) th tbs tf
+
+addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
+addTableCaption = walkM go
+ where go (Table attr c spec th tb tf) = do
+ st <- getState
+ let mblabel = sLastLabel st
+ capt <- case (sCaption st, mblabel) of
+ (Just ils, Nothing) -> return $ caption Nothing (plain ils)
+ (Just ils, Just lab) -> do
+ num <- getNextNumber sLastTableNum
+ setState
+ st{ sLastTableNum = num
+ , sLabels = M.insert lab
+ [Str (renderDottedNum num)]
+ (sLabels st) }
+ return $ caption Nothing (plain ils) -- add number??
+ (Nothing, _) -> return c
+ let attr' = case (attr, mblabel) of
+ ((_,classes,kvs), Just ident) ->
+ (ident,classes,kvs)
+ _ -> attr
+ return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf
+ go x = return x
+
+-- TODO: For now we add a Div to contain table attributes, since
+-- most writers don't do anything yet with attributes on Table.
+-- This can be removed when that changes.
+addAttrDiv :: Attr -> Block -> Block
+addAttrDiv ("",[],[]) b = b
+addAttrDiv attr b = Div attr [b]
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
index a017a2afb..c20b72bc5 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Types.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -1,6 +1,7 @@
+{-# LANGUAGE FlexibleInstances #-}
{- |
Module : Text.Pandoc.Readers.LaTeX.Types
- Copyright : Copyright (C) 2017-2020 John MacFarlane
+ Copyright : Copyright (C) 2017-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -18,7 +19,9 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
)
where
import Data.Text (Text)
-import Text.Parsec.Pos (SourcePos)
+import Text.Parsec.Pos (SourcePos, sourceName)
+import Text.Pandoc.Sources
+import Data.List (groupBy)
data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
Esc1 | Esc2 | Arg Int
@@ -27,6 +30,16 @@ data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
data Tok = Tok SourcePos TokType Text
deriving (Eq, Ord, Show)
+instance ToSources [Tok] where
+ toSources = Sources
+ . map (\ts -> case ts of
+ Tok p _ _ : _ -> (p, mconcat $ map tokToText ts)
+ _ -> error "toSources [Tok] encountered empty group")
+ . groupBy (\(Tok p1 _ _) (Tok p2 _ _) -> sourceName p1 == sourceName p2)
+
+tokToText :: Tok -> Text
+tokToText (Tok _ _ t) = t
+
data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed
deriving (Eq, Ord, Show)