diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/LaTeX')
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/Citation.hs | 210 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/Inline.hs | 397 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/Lang.hs | 321 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/Macro.hs | 184 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/Math.hs | 221 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 273 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 223 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/Table.hs | 379 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/Types.hs | 17 | 
9 files changed, 1973 insertions, 252 deletions
| diff --git a/src/Text/Pandoc/Readers/LaTeX/Citation.hs b/src/Text/Pandoc/Readers/LaTeX/Citation.hs new file mode 100644 index 000000000..af97125c6 --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Citation.hs @@ -0,0 +1,210 @@ +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE OverloadedStrings #-} +module Text.Pandoc.Readers.LaTeX.Citation +  ( citationCommands +  , cites +  ) +where + +import Text.Pandoc.Class +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Builder as B +import qualified Data.Map as M +import Data.Text (Text) +import Control.Applicative ((<|>), optional, many) +import Control.Monad (mzero) +import Control.Monad.Trans (lift) +import Control.Monad.Except (throwError) +import Text.Pandoc.Error (PandocError(PandocParsecError)) +import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline, +                            optional, space, spaces, withRaw, (<|>)) + +citationCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines) +citationCommands inline = +  let citation = citationWith inline +      tok = spaces *> grouped inline +   in M.fromList +  [ ("cite", citation "cite" NormalCitation False) +  , ("Cite", citation "Cite" NormalCitation False) +  , ("citep", citation "citep" NormalCitation False) +  , ("citep*", citation "citep*" NormalCitation False) +  , ("citeal", citation "citeal" NormalCitation False) +  , ("citealp", citation "citealp" NormalCitation False) +  , ("citealp*", citation "citealp*" NormalCitation False) +  , ("autocite", citation "autocite" NormalCitation False) +  , ("smartcite", citation "smartcite" NormalCitation False) +  , ("footcite", inNote <$> citation "footcite" NormalCitation False) +  , ("parencite", citation "parencite" NormalCitation False) +  , ("supercite", citation "supercite" NormalCitation False) +  , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False) +  , ("citeyearpar", citation "citeyearpar" SuppressAuthor False) +  , ("citeyear", citation "citeyear" SuppressAuthor False) +  , ("autocite*", citation "autocite*" SuppressAuthor False) +  , ("cite*", citation "cite*" SuppressAuthor False) +  , ("parencite*", citation "parencite*" SuppressAuthor False) +  , ("textcite", citation "textcite" AuthorInText False) +  , ("citet", citation "citet" AuthorInText False) +  , ("citet*", citation "citet*" AuthorInText False) +  , ("citealt", citation "citealt" AuthorInText False) +  , ("citealt*", citation "citealt*" AuthorInText False) +  , ("textcites", citation "textcites" AuthorInText True) +  , ("cites", citation "cites" NormalCitation True) +  , ("autocites", citation "autocites" NormalCitation True) +  , ("footcites", inNote <$> citation "footcites" NormalCitation True) +  , ("parencites", citation "parencites" NormalCitation True) +  , ("supercites", citation "supercites" NormalCitation True) +  , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True) +  , ("Autocite", citation "Autocite" NormalCitation False) +  , ("Smartcite", citation "Smartcite" NormalCitation False) +  , ("Footcite", inNote <$> citation "Footcite" NormalCitation False) +  , ("Parencite", citation "Parencite" NormalCitation False) +  , ("Supercite", citation "Supercite" NormalCitation False) +  , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False) +  , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False) +  , ("Citeyear", citation "Citeyear" SuppressAuthor False) +  , ("Autocite*", citation "Autocite*" SuppressAuthor False) +  , ("Cite*", citation "Cite*" SuppressAuthor False) +  , ("Parencite*", citation "Parencite*" SuppressAuthor False) +  , ("Textcite", citation "Textcite" AuthorInText False) +  , ("Textcites", citation "Textcites" AuthorInText True) +  , ("Cites", citation "Cites" NormalCitation True) +  , ("Autocites", citation "Autocites" NormalCitation True) +  , ("Footcites", inNote <$> citation "Footcites" NormalCitation True) +  , ("Parencites", citation "Parencites" NormalCitation True) +  , ("Supercites", citation "Supercites" NormalCitation True) +  , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True) +  , ("citetext", complexNatbibCitation inline NormalCitation) +  , ("citeauthor", (try (tok *> sp *> controlSeq "citetext") *> +                        complexNatbibCitation inline AuthorInText) +                   <|> citation "citeauthor" AuthorInText False) +  , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>= +                          addMeta "nocite")) +  ] + +-- citations + +addPrefix :: [Inline] -> [Citation] -> [Citation] +addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks +addPrefix _ _      = [] + +addSuffix :: [Inline] -> [Citation] -> [Citation] +addSuffix s ks@(_:_) = +  let k = last ks +  in  init ks ++ [k {citationSuffix = citationSuffix k ++ s}] +addSuffix _ _ = [] + +simpleCiteArgs :: forall m . PandocMonad m => LP m Inlines -> LP m [Citation] +simpleCiteArgs inline = try $ do +  first  <- optionMaybe $ toList <$> opt +  second <- optionMaybe $ toList <$> opt +  keys <- try $ bgroup *> manyTill citationLabel egroup +  let (pre, suf) = case (first  , second ) of +        (Just s , Nothing) -> (mempty, s ) +        (Just s , Just t ) -> (s , t ) +        _                  -> (mempty, mempty) +      conv k = Citation { citationId      = k +                        , citationPrefix  = [] +                        , citationSuffix  = [] +                        , citationMode    = NormalCitation +                        , citationHash    = 0 +                        , citationNoteNum = 0 +                        } +  return $ addPrefix pre $ addSuffix suf $ map conv keys + where +  opt :: PandocMonad m => LP m Inlines +  opt = do +    toks <- try (sp *> bracketedToks <* sp) +    -- now parse the toks as inlines +    st <- getState +    parsed <- lift $ +      runParserT (mconcat <$> many inline) st "bracketed option" toks +    case parsed of +      Right result -> return result +      Left e       -> throwError $ PandocParsecError (toSources toks) e + + + +citationLabel :: PandocMonad m => LP m Text +citationLabel  = do +  sp +  untokenize <$> +    (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar) +          <* sp +          <* optional (symbol ',') +          <* sp) +  where bibtexKeyChar = ".:;?!`'()/*@_+=-&[]" :: [Char] + +cites :: PandocMonad m +      => LP m Inlines -> CitationMode -> Bool -> LP m [Citation] +cites inline mode multi = try $ do +  let paropt = parenWrapped inline +  cits <- if multi +             then do +               multiprenote <- optionMaybe $ toList <$> paropt +               multipostnote <- optionMaybe $ toList <$> paropt +               let (pre, suf) = case (multiprenote, multipostnote) of +                     (Just s , Nothing) -> (mempty, s) +                     (Nothing , Just t) -> (mempty, t) +                     (Just s , Just t ) -> (s, t) +                     _                  -> (mempty, mempty) +               tempCits <- many1 $ simpleCiteArgs inline +               case tempCits of +                 (k:ks) -> case ks of +                             (_:_) -> return $ (addMprenote pre k : init ks) ++ +                                                 [addMpostnote suf (last ks)] +                             _ -> return [addMprenote pre (addMpostnote suf k)] +                 _ -> return [[]] +             else count 1 $ simpleCiteArgs inline +  let cs = concat cits +  return $ case mode of +        AuthorInText -> case cs of +                             (c:rest) -> c {citationMode = mode} : rest +                             []       -> [] +        _            -> map (\a -> a {citationMode = mode}) cs +  where mprenote (k:ks) = (k:ks) ++ [Space] +        mprenote _ = mempty +        mpostnote (k:ks) = [Str ",", Space] ++ (k:ks) +        mpostnote _ = mempty +        addMprenote mpn (k:ks) = +          let mpnfinal = case citationPrefix k of +                           (_:_) -> mprenote mpn +                           _ -> mpn +          in addPrefix mpnfinal (k:ks) +        addMprenote _ _ = [] +        addMpostnote = addSuffix . mpostnote + +citationWith :: PandocMonad m +             => LP m Inlines -> Text -> CitationMode -> Bool -> LP m Inlines +citationWith inline name mode multi = do +  (c,raw) <- withRaw $ cites inline mode multi +  return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw) + +handleCitationPart :: Inlines -> [Citation] +handleCitationPart ils = +  let isCite Cite{} = True +      isCite _      = False +      (pref, rest) = break isCite (toList ils) +  in case rest of +          (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs +          _                -> [] + +complexNatbibCitation :: PandocMonad m +                      => LP m Inlines -> CitationMode -> LP m Inlines +complexNatbibCitation inline mode = try $ do +  (cs, raw) <- +    withRaw $ concat <$> do +      bgroup +      items <- mconcat <$> +                many1 (notFollowedBy (symbol ';') >> inline) +                  `sepBy1` symbol ';' +      egroup +      return $ map handleCitationPart items +  case cs of +       []       -> mzero +       (c:cits) -> return $ cite (c{ citationMode = mode }:cits) +                      (rawInline "latex" $ "\\citetext" <> untokenize raw) + +inNote :: Inlines -> Inlines +inNote ils = +  note $ para $ ils <> str "." + diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs new file mode 100644 index 000000000..7b8bca4af --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs @@ -0,0 +1,397 @@ +{-# LANGUAGE OverloadedStrings     #-} +{-# LANGUAGE ViewPatterns          #-} +{- | +   Module      : Text.Pandoc.Readers.LaTeX.Inline +   Copyright   : Copyright (C) 2006-2021 John MacFarlane +   License     : GNU GPL, version 2 or above + +   Maintainer  : John MacFarlane <jgm@berkeley.edu> +   Stability   : alpha +   Portability : portable +-} +module Text.Pandoc.Readers.LaTeX.Inline +  ( acronymCommands +  , verbCommands +  , charCommands +  , accentCommands +  , nameCommands +  , biblatexInlineCommands +  , refCommands +  , rawInlineOr +  , listingsLanguage +  ) +where + +import qualified Data.Map as M +import Data.Text (Text) +import qualified Data.Text as T +import Text.Pandoc.Builder +import Text.Pandoc.Shared (toRomanNumeral, safeRead) +import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..)) +import Control.Applicative (optional, (<|>)) +import Control.Monad (guard, mzero, mplus, unless) +import Text.Pandoc.Class.PandocMonad (PandocMonad (..), translateTerm) +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Extensions (extensionEnabled, Extension(..)) +import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy, +                            manyTill, getInput, setInput, incSourceColumn, +                            option, many1, try) +import Data.Char (isDigit) +import Text.Pandoc.Highlighting (fromListingsLanguage,) +import Data.Maybe (maybeToList, fromMaybe) +import Text.Pandoc.Options (ReaderOptions(..)) +import qualified Data.Text.Normalize as Normalize +import qualified Text.Pandoc.Translations as Translations + +rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines +rawInlineOr name' fallback = do +  parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions +  if parseRaw +     then rawInline "latex" <$> getRawCommand name' ("\\" <> name') +     else fallback + +dolabel :: PandocMonad m => LP m Inlines +dolabel = do +  v <- braced +  let refstr = untokenize v +  updateState $ \st -> +    st{ sLastLabel = Just refstr } +  return $ spanWith (refstr,[],[("label", refstr)]) +    $ inBrackets $ str $ untokenize v + +doref :: PandocMonad m => Text -> LP m Inlines +doref cls = do +  v <- braced +  let refstr = untokenize v +  return $ linkWith ("",[],[ ("reference-type", cls) +                           , ("reference", refstr)]) +                    ("#" <> refstr) +                    "" +                    (inBrackets $ str refstr) + +inBrackets :: Inlines -> Inlines +inBrackets x = str "[" <> x <> str "]" + +doTerm :: PandocMonad m => Translations.Term -> LP m Inlines +doTerm term = str <$> translateTerm term + +lit :: Text -> LP m Inlines +lit = pure . str + +doverb :: PandocMonad m => LP m Inlines +doverb = do +  Tok _ Symbol t <- anySymbol +  marker <- case T.uncons t of +              Just (c, ts) | T.null ts -> return c +              _            -> mzero +  withVerbatimMode $ +    code . untokenize <$> +      manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker) + +verbTok :: PandocMonad m => Char -> LP m Tok +verbTok stopchar = do +  t@(Tok pos toktype txt) <- anyTok +  case T.findIndex (== stopchar) txt of +       Nothing -> return t +       Just i  -> do +         let (t1, t2) = T.splitAt i txt +         inp <- getInput +         setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar) +                  : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp +         return $ Tok pos toktype t1 + +listingsLanguage :: [(Text, Text)] -> Maybe Text +listingsLanguage opts = +  case lookup "language" opts of +    Nothing  -> Nothing +    Just l   -> fromListingsLanguage l `mplus` Just l + +dolstinline :: PandocMonad m => LP m Inlines +dolstinline = do +  options <- option [] keyvals +  let classes = maybeToList $ listingsLanguage options +  doinlinecode classes + +domintinline :: PandocMonad m => LP m Inlines +domintinline = do +  skipopts +  cls <- untokenize <$> braced +  doinlinecode [cls] + +doinlinecode :: PandocMonad m => [Text] -> LP m Inlines +doinlinecode classes = do +  Tok _ Symbol t <- anySymbol +  marker <- case T.uncons t of +              Just (c, ts) | T.null ts -> return c +              _            -> mzero +  let stopchar = if marker == '{' then '}' else marker +  withVerbatimMode $ +    codeWith ("",classes,[]) . T.map nlToSpace . untokenize <$> +      manyTill (verbTok stopchar) (symbol stopchar) + +nlToSpace :: Char -> Char +nlToSpace '\n' = ' ' +nlToSpace x    = x + +romanNumeralUpper :: (PandocMonad m) => LP m Inlines +romanNumeralUpper = +  str . toRomanNumeral <$> romanNumeralArg + +romanNumeralLower :: (PandocMonad m) => LP m Inlines +romanNumeralLower = +  str . T.toLower . toRomanNumeral <$> romanNumeralArg + +romanNumeralArg :: (PandocMonad m) => LP m Int +romanNumeralArg = spaces *> (parser <|> inBraces) +  where +    inBraces = do +      symbol '{' +      spaces +      res <- parser +      spaces +      symbol '}' +      return res +    parser = do +      s <- untokenize <$> many1 (satisfyTok isWordTok) +      let (digits, rest) = T.span isDigit s +      unless (T.null rest) $ +        Prelude.fail "Non-digits in argument to \\Rn or \\RN" +      safeRead digits + +accentWith :: PandocMonad m +           => LP m Inlines -> Char -> Maybe Char -> LP m Inlines +accentWith tok combiningAccent fallBack = try $ do +  ils <- tok +  case toList ils of +       (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $ +         -- try to normalize to the combined character: +         Str (Normalize.normalize Normalize.NFC +               (T.pack [x, combiningAccent]) <> xs) : ys +       [Space] -> return $ str $ T.singleton +                         $ fromMaybe combiningAccent fallBack +       []      -> return $ str $ T.singleton +                         $ fromMaybe combiningAccent fallBack +       _       -> return ils + + +verbCommands :: PandocMonad m => M.Map Text (LP m Inlines) +verbCommands = M.fromList +  [ ("verb", doverb) +  , ("lstinline", dolstinline) +  , ("mintinline", domintinline) +  , ("Verb", doverb) +  ] + +accentCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines) +accentCommands tok = +  let accent = accentWith tok +  in  M.fromList +  [ ("aa", lit "å") +  , ("AA", lit "Å") +  , ("ss", lit "ß") +  , ("o", lit "ø") +  , ("O", lit "Ø") +  , ("L", lit "Ł") +  , ("l", lit "ł") +  , ("ae", lit "æ") +  , ("AE", lit "Æ") +  , ("oe", lit "œ") +  , ("OE", lit "Œ") +  , ("pounds", lit "£") +  , ("euro", lit "€") +  , ("copyright", lit "©") +  , ("textasciicircum", lit "^") +  , ("textasciitilde", lit "~") +  , ("H", accent '\779' Nothing) -- hungarumlaut +  , ("`", accent '\768' (Just '`')) -- grave +  , ("'", accent '\769' (Just '\'')) -- acute +  , ("^", accent '\770' (Just '^')) -- circ +  , ("~", accent '\771' (Just '~')) -- tilde +  , ("\"", accent '\776' Nothing) -- umlaut +  , (".", accent '\775' Nothing) -- dot +  , ("=", accent '\772' Nothing) -- macron +  , ("|", accent '\781' Nothing) -- vertical line above +  , ("b", accent '\817' Nothing) -- macron below +  , ("c", accent '\807' Nothing) -- cedilla +  , ("G", accent '\783' Nothing) -- doublegrave +  , ("h", accent '\777' Nothing) -- hookabove +  , ("d", accent '\803' Nothing) -- dotbelow +  , ("f", accent '\785' Nothing)  -- inverted breve +  , ("r", accent '\778' Nothing)  -- ringabove +  , ("t", accent '\865' Nothing)  -- double inverted breve +  , ("U", accent '\782' Nothing)  -- double vertical line above +  , ("v", accent '\780' Nothing) -- hacek +  , ("u", accent '\774' Nothing) -- breve +  , ("k", accent '\808' Nothing) -- ogonek +  , ("textogonekcentered", accent '\808' Nothing) -- ogonek +  , ("i", lit "ı")  -- dotless i +  , ("j", lit "ȷ")  -- dotless j +  , ("newtie", accent '\785' Nothing) -- inverted breve +  , ("textcircled", accent '\8413' Nothing) -- combining circle +  ] + +charCommands :: PandocMonad m => M.Map Text (LP m Inlines) +charCommands = M.fromList +  [ ("ldots", lit "…") +  , ("vdots", lit "\8942") +  , ("dots", lit "…") +  , ("mdots", lit "…") +  , ("sim", lit "~") +  , ("sep", lit ",") +  , ("P", lit "¶") +  , ("S", lit "§") +  , ("$", lit "$") +  , ("%", lit "%") +  , ("&", lit "&") +  , ("#", lit "#") +  , ("_", lit "_") +  , ("{", lit "{") +  , ("}", lit "}") +  , ("qed", lit "\a0\x25FB") +  , ("lq", return (str "‘")) +  , ("rq", return (str "’")) +  , ("textquoteleft", return (str "‘")) +  , ("textquoteright", return (str "’")) +  , ("textquotedblleft", return (str "“")) +  , ("textquotedblright", return (str "”")) +  , ("/", pure mempty) -- italic correction +  , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState +                            guard $ not inTableCell +                            optional rawopt +                            spaces)) +  , (",", lit "\8198") +  , ("@", pure mempty) +  , (" ", lit "\160") +  , ("ps", pure $ str "PS." <> space) +  , ("TeX", lit "TeX") +  , ("LaTeX", lit "LaTeX") +  , ("bar", lit "|") +  , ("textless", lit "<") +  , ("textgreater", lit ">") +  , ("textbackslash", lit "\\") +  , ("backslash", lit "\\") +  , ("slash", lit "/") +  -- fontawesome +  , ("faCheck", lit "\10003") +  , ("faClose", lit "\10007") +  -- hyphenat +  , ("bshyp", lit "\\\173") +  , ("fshyp", lit "/\173") +  , ("dothyp", lit ".\173") +  , ("colonhyp", lit ":\173") +  , ("hyp", lit "-") +  ] + +biblatexInlineCommands :: PandocMonad m +                       => LP m Inlines -> M.Map Text (LP m Inlines) +biblatexInlineCommands tok = M.fromList +  -- biblatex misc +  [ ("RN", romanNumeralUpper) +  , ("Rn", romanNumeralLower) +  , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok) +  , ("mkbibemph", spanWith nullAttr . emph <$> tok) +  , ("mkbibitalic", spanWith nullAttr . emph <$> tok) +  , ("mkbibbold", spanWith nullAttr . strong <$> tok) +  , ("mkbibparens", +       spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok) +  , ("mkbibbrackets", +       spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok) +  , ("autocap", spanWith nullAttr <$> tok) +  , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok) +  , ("bibstring", +       (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize +         <$> braced) +  , ("adddot", pure (str ".")) +  , ("adddotspace", pure (spanWith nullAttr (str "." <> space))) +  , ("addabbrvspace", pure space) +  , ("hyphen", pure (str "-")) +  ] + +nameCommands :: PandocMonad m => M.Map Text (LP m Inlines) +nameCommands = M.fromList +  [ ("figurename", doTerm Translations.Figure) +  , ("prefacename", doTerm Translations.Preface) +  , ("refname", doTerm Translations.References) +  , ("bibname", doTerm Translations.Bibliography) +  , ("chaptername", doTerm Translations.Chapter) +  , ("partname", doTerm Translations.Part) +  , ("contentsname", doTerm Translations.Contents) +  , ("listfigurename", doTerm Translations.ListOfFigures) +  , ("listtablename", doTerm Translations.ListOfTables) +  , ("indexname", doTerm Translations.Index) +  , ("abstractname", doTerm Translations.Abstract) +  , ("tablename", doTerm Translations.Table) +  , ("enclname", doTerm Translations.Encl) +  , ("ccname", doTerm Translations.Cc) +  , ("headtoname", doTerm Translations.To) +  , ("pagename", doTerm Translations.Page) +  , ("seename", doTerm Translations.See) +  , ("seealsoname", doTerm Translations.SeeAlso) +  , ("proofname", doTerm Translations.Proof) +  , ("glossaryname", doTerm Translations.Glossary) +  , ("lstlistingname", doTerm Translations.Listing) +  ] + +refCommands :: PandocMonad m => M.Map Text (LP m Inlines) +refCommands = M.fromList +  [ ("label", rawInlineOr "label" dolabel) +  , ("ref", rawInlineOr "ref" $ doref "ref") +  , ("cref", rawInlineOr "cref" $ doref "ref")       -- from cleveref.sty +  , ("vref", rawInlineOr "vref" $ doref "ref+page")  -- from varioref.sty +  , ("eqref", rawInlineOr "eqref" $ doref "eqref")   -- from amsmath.sty +  ] + +acronymCommands :: PandocMonad m => M.Map Text (LP m Inlines) +acronymCommands = M.fromList +  -- glossaries package +  [ ("gls", doAcronym "short") +  , ("Gls", doAcronym "short") +  , ("glsdesc", doAcronym "long") +  , ("Glsdesc", doAcronym "long") +  , ("GLSdesc", doAcronym "long") +  , ("acrlong", doAcronym "long") +  , ("Acrlong", doAcronym "long") +  , ("acrfull", doAcronym "full") +  , ("Acrfull", doAcronym "full") +  , ("acrshort", doAcronym "abbrv") +  , ("Acrshort", doAcronym "abbrv") +  , ("glspl", doAcronymPlural "short") +  , ("Glspl", doAcronymPlural "short") +  , ("glsdescplural", doAcronymPlural "long") +  , ("Glsdescplural", doAcronymPlural "long") +  , ("GLSdescplural", doAcronymPlural "long") +  -- acronyms package +  , ("ac", doAcronym "short") +  , ("acf", doAcronym "full") +  , ("acs", doAcronym "abbrv") +  , ("acl", doAcronym "long") +  , ("acp", doAcronymPlural "short") +  , ("acfp", doAcronymPlural "full") +  , ("acsp", doAcronymPlural "abbrv") +  , ("aclp", doAcronymPlural "long") +  , ("Ac", doAcronym "short") +  , ("Acf", doAcronym "full") +  , ("Acs", doAcronym "abbrv") +  , ("Acl", doAcronym "long") +  , ("Acp", doAcronymPlural "short") +  , ("Acfp", doAcronymPlural "full") +  , ("Acsp", doAcronymPlural "abbrv") +  , ("Aclp", doAcronymPlural "long") +  ] + +doAcronym :: PandocMonad m => Text -> LP m Inlines +doAcronym form = do +  acro <- braced +  return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro), +    ("acronym-form", "singular+" <> form)]) +    $ str $ untokenize acro] + +doAcronymPlural :: PandocMonad m => Text -> LP m Inlines +doAcronymPlural form = do +  acro <- braced +  let plural = str "s" +  return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro), +    ("acronym-form", "plural+" <> form)]) $ +   mconcat [str $ untokenize acro, plural]] + + diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs index 814b2fe79..6a8327904 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs @@ -1,7 +1,7 @@  {-# LANGUAGE OverloadedStrings #-}  {- |     Module      : Text.Pandoc.Readers.LaTeX.Lang -   Copyright   : Copyright (C) 2018-2020 John MacFarlane +   Copyright   : Copyright (C) 2018-2021 John MacFarlane     License     : GNU GPL, version 2 or above     Maintainer  : John MacFarlane <jgm@berkeley.edu> @@ -12,144 +12,223 @@ Functions for parsing polyglossia and babel language specifiers to  BCP47 'Lang'.  -}  module Text.Pandoc.Readers.LaTeX.Lang -  ( polyglossiaLangToBCP47 +  ( setDefaultLanguage +  , polyglossiaLangToBCP47    , babelLangToBCP47 +  , enquoteCommands +  , inlineLanguageCommands    )  where  import qualified Data.Map as M +import Data.Text (Text)  import qualified Data.Text as T -import Text.Pandoc.BCP47 (Lang(..)) +import Text.Pandoc.Shared (extractSpaces) +import Text.Collate.Lang (Lang(..), renderLang) +import Text.Pandoc.Class (PandocMonad(..), setTranslations) +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..), +                            withQuoteContext) +import Text.Pandoc.Builder (Blocks, Inlines, setMeta, str, spanWith, +                            singleQuoted, doubleQuoted) + +enquote :: PandocMonad m +        => LP m Inlines +        -> Bool -> Maybe Text -> LP m Inlines +enquote tok starred mblang = do +  skipopts +  let lang = mblang >>= babelLangToBCP47 +  let langspan = case lang of +                      Nothing -> id +                      Just l  -> spanWith ("",[],[("lang", renderLang l)]) +  quoteContext <- sQuoteContext <$> getState +  if starred || quoteContext == InDoubleQuote +     then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok +     else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok + +enquoteCommands :: PandocMonad m +                => LP m Inlines -> M.Map Text (LP m Inlines) +enquoteCommands tok = M.fromList +  [ ("enquote*", enquote tok True Nothing) +  , ("enquote", enquote tok False Nothing) +  -- foreignquote is supposed to use native quote marks +  , ("foreignquote*", braced >>= enquote tok True . Just . untokenize) +  , ("foreignquote", braced >>= enquote tok False . Just . untokenize) +  -- hypehnquote uses regular quotes +  , ("hyphenquote*", braced >>= enquote tok True . Just . untokenize) +  , ("hyphenquote", braced >>= enquote tok False . Just . untokenize) +  ] + +foreignlanguage :: PandocMonad m => LP m Inlines -> LP m Inlines +foreignlanguage tok = do +  babelLang <- untokenize <$> braced +  case babelLangToBCP47 babelLang of +       Just lang -> spanWith ("", [], [("lang",  renderLang lang)]) <$> tok +       _ -> tok + +inlineLanguageCommands :: PandocMonad m +                       => LP m Inlines -> M.Map Text (LP m Inlines) +inlineLanguageCommands tok = +  M.fromList $ +    ("foreignlanguage", foreignlanguage tok) : +    (mk <$> M.toList polyglossiaLangToBCP47) +  where +    mk (polyglossia, bcp47Func) = +      ("text" <> polyglossia, inlineLanguage tok bcp47Func) + +inlineLanguage :: PandocMonad m +               => LP m Inlines -> (Text -> Lang) -> LP m Inlines +inlineLanguage tok bcp47Func = do +  o <- option "" $ T.filter (\c -> c /= '[' && c /= ']') +                <$> rawopt +  let lang = renderLang $ bcp47Func o +  extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok + +setDefaultLanguage :: PandocMonad m => LP m Blocks +setDefaultLanguage = do +  o <- option "" $ T.filter (\c -> c /= '[' && c /= ']') +                <$> rawopt +  polylang <- untokenize <$> braced +  case M.lookup polylang polyglossiaLangToBCP47 of +       Nothing -> return mempty -- TODO mzero? warning? +       Just langFunc -> do +         let l = langFunc o +         setTranslations l +         updateState $ setMeta "lang" $ str (renderLang l) +         return mempty  polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang)  polyglossiaLangToBCP47 = M.fromList    [ ("arabic", \o -> case T.filter (/=' ') o of -       "locale=algeria"    -> Lang "ar" "" "DZ" [] -       "locale=mashriq"    -> Lang "ar" "" "SY" [] -       "locale=libya"      -> Lang "ar" "" "LY" [] -       "locale=morocco"    -> Lang "ar" "" "MA" [] -       "locale=mauritania" -> Lang "ar" "" "MR" [] -       "locale=tunisia"    -> Lang "ar" "" "TN" [] -       _                   -> Lang "ar" "" "" []) +       "locale=algeria"    -> Lang "ar" Nothing (Just "DZ") [] [] [] +       "locale=mashriq"    -> Lang "ar" Nothing (Just "SY") [] [] [] +       "locale=libya"      -> Lang "ar" Nothing (Just "LY") [] [] [] +       "locale=morocco"    -> Lang "ar" Nothing (Just "MA") [] [] [] +       "locale=mauritania" -> Lang "ar" Nothing (Just "MR") [] [] [] +       "locale=tunisia"    -> Lang "ar" Nothing (Just "TN") [] [] [] +       _                   -> Lang "ar" Nothing (Just "")   [] [] [])    , ("german", \o -> case T.filter (/=' ') o of -       "spelling=old" -> Lang "de" "" "DE" ["1901"] +       "spelling=old" -> Lang "de" Nothing (Just "DE") ["1901"] [] []         "variant=austrian,spelling=old" -                       -> Lang "de" "" "AT" ["1901"] -       "variant=austrian" -> Lang "de" "" "AT" [] +                       -> Lang "de" Nothing (Just "AT") ["1901"] [] [] +       "variant=austrian" -> Lang "de" Nothing (Just "AT") [] [] []         "variant=swiss,spelling=old" -                       -> Lang "de" "" "CH" ["1901"] -       "variant=swiss" -> Lang "de" "" "CH" [] -       _ -> Lang "de" "" "" []) -  , ("lsorbian", \_ -> Lang "dsb" "" "" []) +                       -> Lang "de" Nothing (Just "CH") ["1901"] [] [] +       "variant=swiss" -> Lang "de" Nothing (Just "CH") [] [] [] +       _ -> Lang "de" Nothing Nothing [] [] []) +  , ("lsorbian", \_ -> Lang "dsb" Nothing Nothing [] [] [])    , ("greek", \o -> case T.filter (/=' ') o of -       "variant=poly"    -> Lang "el" "" "polyton" [] -       "variant=ancient" -> Lang "grc" "" "" [] -       _                 -> Lang "el" "" "" []) +       "variant=poly"    -> Lang "el" Nothing (Just "polyton") [] [] [] +       "variant=ancient" -> Lang "grc" Nothing Nothing [] [] [] +       _                 -> Lang "el" Nothing Nothing [] [] [])    , ("english", \o -> case T.filter (/=' ') o of -       "variant=australian" -> Lang "en" "" "AU" [] -       "variant=canadian"   -> Lang "en" "" "CA" [] -       "variant=british"    -> Lang "en" "" "GB" [] -       "variant=newzealand" -> Lang "en" "" "NZ" [] -       "variant=american"   -> Lang "en" "" "US" [] -       _                    -> Lang "en" "" "" []) -  , ("usorbian", \_ -> Lang "hsb" "" "" []) +       "variant=australian" -> Lang "en" Nothing (Just "AU") [] [] [] +       "variant=canadian"   -> Lang "en" Nothing (Just "CA") [] [] [] +       "variant=british"    -> Lang "en" Nothing (Just "GB") [] [] [] +       "variant=newzealand" -> Lang "en" Nothing (Just "NZ") [] [] [] +       "variant=american"   -> Lang "en" Nothing (Just "US") [] [] [] +       _                    -> Lang "en" Nothing (Just "")   [] [] []) +  , ("usorbian", \_ -> Lang "hsb" Nothing Nothing [] [] [])    , ("latin", \o -> case T.filter (/=' ') o of -       "variant=classic" -> Lang "la" "" "" ["x-classic"] -       _                 -> Lang "la" "" "" []) -  , ("slovenian", \_ -> Lang "sl" "" "" []) -  , ("serbianc", \_ -> Lang "sr" "cyrl" "" []) -  , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"]) -  , ("afrikaans", \_ -> Lang "af" "" "" []) -  , ("amharic", \_ -> Lang "am" "" "" []) -  , ("assamese", \_ -> Lang "as" "" "" []) -  , ("asturian", \_ -> Lang "ast" "" "" []) -  , ("bulgarian", \_ -> Lang "bg" "" "" []) -  , ("bengali", \_ -> Lang "bn" "" "" []) -  , ("tibetan", \_ -> Lang "bo" "" "" []) -  , ("breton", \_ -> Lang "br" "" "" []) -  , ("catalan", \_ -> Lang "ca" "" "" []) -  , ("welsh", \_ -> Lang "cy" "" "" []) -  , ("czech", \_ -> Lang "cs" "" "" []) -  , ("coptic", \_ -> Lang "cop" "" "" []) -  , ("danish", \_ -> Lang "da" "" "" []) -  , ("divehi", \_ -> Lang "dv" "" "" []) -  , ("esperanto", \_ -> Lang "eo" "" "" []) -  , ("spanish", \_ -> Lang "es" "" "" []) -  , ("estonian", \_ -> Lang "et" "" "" []) -  , ("basque", \_ -> Lang "eu" "" "" []) -  , ("farsi", \_ -> Lang "fa" "" "" []) -  , ("finnish", \_ -> Lang "fi" "" "" []) -  , ("french", \_ -> Lang "fr" "" "" []) -  , ("friulan", \_ -> Lang "fur" "" "" []) -  , ("irish", \_ -> Lang "ga" "" "" []) -  , ("scottish", \_ -> Lang "gd" "" "" []) -  , ("ethiopic", \_ -> Lang "gez" "" "" []) -  , ("galician", \_ -> Lang "gl" "" "" []) -  , ("hebrew", \_ -> Lang "he" "" "" []) -  , ("hindi", \_ -> Lang "hi" "" "" []) -  , ("croatian", \_ -> Lang "hr" "" "" []) -  , ("magyar", \_ -> Lang "hu" "" "" []) -  , ("armenian", \_ -> Lang "hy" "" "" []) -  , ("interlingua", \_ -> Lang "ia" "" "" []) -  , ("indonesian", \_ -> Lang "id" "" "" []) -  , ("icelandic", \_ -> Lang "is" "" "" []) -  , ("italian", \_ -> Lang "it" "" "" []) -  , ("japanese", \_ -> Lang "jp" "" "" []) -  , ("khmer", \_ -> Lang "km" "" "" []) -  , ("kurmanji", \_ -> Lang "kmr" "" "" []) -  , ("kannada", \_ -> Lang "kn" "" "" []) -  , ("korean", \_ -> Lang "ko" "" "" []) -  , ("lao", \_ -> Lang "lo" "" "" []) -  , ("lithuanian", \_ -> Lang "lt" "" "" []) -  , ("latvian", \_ -> Lang "lv" "" "" []) -  , ("malayalam", \_ -> Lang "ml" "" "" []) -  , ("mongolian", \_ -> Lang "mn" "" "" []) -  , ("marathi", \_ -> Lang "mr" "" "" []) -  , ("dutch", \_ -> Lang "nl" "" "" []) -  , ("nynorsk", \_ -> Lang "nn" "" "" []) -  , ("norsk", \_ -> Lang "no" "" "" []) -  , ("nko", \_ -> Lang "nqo" "" "" []) -  , ("occitan", \_ -> Lang "oc" "" "" []) -  , ("panjabi", \_ -> Lang "pa" "" "" []) -  , ("polish", \_ -> Lang "pl" "" "" []) -  , ("piedmontese", \_ -> Lang "pms" "" "" []) -  , ("portuguese", \_ -> Lang "pt" "" "" []) -  , ("romansh", \_ -> Lang "rm" "" "" []) -  , ("romanian", \_ -> Lang "ro" "" "" []) -  , ("russian", \_ -> Lang "ru" "" "" []) -  , ("sanskrit", \_ -> Lang "sa" "" "" []) -  , ("samin", \_ -> Lang "se" "" "" []) -  , ("slovak", \_ -> Lang "sk" "" "" []) -  , ("albanian", \_ -> Lang "sq" "" "" []) -  , ("serbian", \_ -> Lang "sr" "" "" []) -  , ("swedish", \_ -> Lang "sv" "" "" []) -  , ("syriac", \_ -> Lang "syr" "" "" []) -  , ("tamil", \_ -> Lang "ta" "" "" []) -  , ("telugu", \_ -> Lang "te" "" "" []) -  , ("thai", \_ -> Lang "th" "" "" []) -  , ("turkmen", \_ -> Lang "tk" "" "" []) -  , ("turkish", \_ -> Lang "tr" "" "" []) -  , ("ukrainian", \_ -> Lang "uk" "" "" []) -  , ("urdu", \_ -> Lang "ur" "" "" []) -  , ("vietnamese", \_ -> Lang "vi" "" "" []) +       "variant=classic" -> Lang "la" Nothing Nothing ["x-classic"] [] [] +       _                 -> Lang "la" Nothing Nothing [] [] []) +  , ("slovenian", \_ -> Lang "sl" Nothing Nothing [] [] []) +  , ("serbianc", \_ -> Lang "sr" (Just "Cyrl") Nothing [] [] []) +  , ("pinyin", \_ -> Lang "zh" (Just "Latn") Nothing ["pinyin"] [] []) +  , ("afrikaans", \_ -> simpleLang "af") +  , ("amharic", \_ -> simpleLang "am") +  , ("assamese", \_ -> simpleLang "as") +  , ("asturian", \_ -> simpleLang "ast") +  , ("bulgarian", \_ -> simpleLang "bg") +  , ("bengali", \_ -> simpleLang "bn") +  , ("tibetan", \_ -> simpleLang "bo") +  , ("breton", \_ -> simpleLang "br") +  , ("catalan", \_ -> simpleLang "ca") +  , ("welsh", \_ -> simpleLang "cy") +  , ("czech", \_ -> simpleLang "cs") +  , ("coptic", \_ -> simpleLang "cop") +  , ("danish", \_ -> simpleLang "da") +  , ("divehi", \_ -> simpleLang "dv") +  , ("esperanto", \_ -> simpleLang "eo") +  , ("spanish", \_ -> simpleLang "es") +  , ("estonian", \_ -> simpleLang "et") +  , ("basque", \_ -> simpleLang "eu") +  , ("farsi", \_ -> simpleLang "fa") +  , ("finnish", \_ -> simpleLang "fi") +  , ("french", \_ -> simpleLang "fr") +  , ("friulan", \_ -> simpleLang "fur") +  , ("irish", \_ -> simpleLang "ga") +  , ("scottish", \_ -> simpleLang "gd") +  , ("ethiopic", \_ -> simpleLang "gez") +  , ("galician", \_ -> simpleLang "gl") +  , ("hebrew", \_ -> simpleLang "he") +  , ("hindi", \_ -> simpleLang "hi") +  , ("croatian", \_ -> simpleLang "hr") +  , ("magyar", \_ -> simpleLang "hu") +  , ("armenian", \_ -> simpleLang "hy") +  , ("interlingua", \_ -> simpleLang "ia") +  , ("indonesian", \_ -> simpleLang "id") +  , ("icelandic", \_ -> simpleLang "is") +  , ("italian", \_ -> simpleLang "it") +  , ("japanese", \_ -> simpleLang "jp") +  , ("khmer", \_ -> simpleLang "km") +  , ("kurmanji", \_ -> simpleLang "kmr") +  , ("kannada", \_ -> simpleLang "kn") +  , ("korean", \_ -> simpleLang "ko") +  , ("lao", \_ -> simpleLang "lo") +  , ("lithuanian", \_ -> simpleLang "lt") +  , ("latvian", \_ -> simpleLang "lv") +  , ("malayalam", \_ -> simpleLang "ml") +  , ("mongolian", \_ -> simpleLang "mn") +  , ("marathi", \_ -> simpleLang "mr") +  , ("dutch", \_ -> simpleLang "nl") +  , ("nynorsk", \_ -> simpleLang "nn") +  , ("norsk", \_ -> simpleLang "no") +  , ("nko", \_ -> simpleLang "nqo") +  , ("occitan", \_ -> simpleLang "oc") +  , ("panjabi", \_ -> simpleLang "pa") +  , ("polish", \_ -> simpleLang "pl") +  , ("piedmontese", \_ -> simpleLang "pms") +  , ("portuguese", \_ -> simpleLang "pt") +  , ("romansh", \_ -> simpleLang "rm") +  , ("romanian", \_ -> simpleLang "ro") +  , ("russian", \_ -> simpleLang "ru") +  , ("sanskrit", \_ -> simpleLang "sa") +  , ("samin", \_ -> simpleLang "se") +  , ("slovak", \_ -> simpleLang "sk") +  , ("albanian", \_ -> simpleLang "sq") +  , ("serbian", \_ -> simpleLang "sr") +  , ("swedish", \_ -> simpleLang "sv") +  , ("syriac", \_ -> simpleLang "syr") +  , ("tamil", \_ -> simpleLang "ta") +  , ("telugu", \_ -> simpleLang "te") +  , ("thai", \_ -> simpleLang "th") +  , ("turkmen", \_ -> simpleLang "tk") +  , ("turkish", \_ -> simpleLang "tr") +  , ("ukrainian", \_ -> simpleLang "uk") +  , ("urdu", \_ -> simpleLang "ur") +  , ("vietnamese", \_ -> simpleLang "vi")    ] +simpleLang :: Text -> Lang +simpleLang l = Lang l Nothing Nothing [] [] [] +  babelLangToBCP47 :: T.Text -> Maybe Lang  babelLangToBCP47 s =    case s of -       "austrian" -> Just $ Lang "de" "" "AT" ["1901"] -       "naustrian" -> Just $ Lang "de" "" "AT" [] -       "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"] -       "nswissgerman" -> Just $ Lang "de" "" "CH" [] -       "german" -> Just $ Lang "de" "" "DE" ["1901"] -       "ngerman" -> Just $ Lang "de" "" "DE" [] -       "lowersorbian" -> Just $ Lang "dsb" "" "" [] -       "uppersorbian" -> Just $ Lang "hsb" "" "" [] -       "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"] -       "slovene" -> Just $ Lang "sl" "" "" [] -       "australian" -> Just $ Lang "en" "" "AU" [] -       "canadian" -> Just $ Lang "en" "" "CA" [] -       "british" -> Just $ Lang "en" "" "GB" [] -       "newzealand" -> Just $ Lang "en" "" "NZ" [] -       "american" -> Just $ Lang "en" "" "US" [] -       "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"] +       "austrian" -> Just $ Lang "de" Nothing (Just "AT") ["1901"] [] [] +       "naustrian" -> Just $ Lang "de" Nothing (Just "AT") [] [] [] +       "swissgerman" -> Just $ Lang "de" Nothing (Just "CH") ["1901"] [] [] +       "nswissgerman" -> Just $ Lang "de" Nothing (Just "CH") [] [] [] +       "german" -> Just $ Lang "de" Nothing (Just "DE") ["1901"] [] [] +       "ngerman" -> Just $ Lang "de" Nothing (Just "DE") [] [] [] +       "lowersorbian" -> Just $ Lang "dsb" Nothing Nothing [] [] [] +       "uppersorbian" -> Just $ Lang "hsb" Nothing Nothing [] [] [] +       "polutonikogreek" -> Just $ Lang "el" Nothing Nothing ["polyton"] [] [] +       "slovene" -> Just $ simpleLang "sl" +       "australian" -> Just $ Lang "en" Nothing (Just "AU") [] [] [] +       "canadian" -> Just $ Lang "en" Nothing (Just "CA") [] [] [] +       "british" -> Just $ Lang "en" Nothing (Just "GB") [] [] [] +       "newzealand" -> Just $ Lang "en" Nothing (Just "NZ") [] [] [] +       "american" -> Just $ Lang "en" Nothing (Just "US") [] [] [] +       "classiclatin" -> Just $ Lang "la" Nothing Nothing ["x-classic"] [] []         _ -> ($ "") <$> M.lookup s polyglossiaLangToBCP47 diff --git a/src/Text/Pandoc/Readers/LaTeX/Macro.hs b/src/Text/Pandoc/Readers/LaTeX/Macro.hs new file mode 100644 index 000000000..5495a8e74 --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Macro.hs @@ -0,0 +1,184 @@ +{-# LANGUAGE OverloadedStrings #-} +module Text.Pandoc.Readers.LaTeX.Macro +  ( macroDef +  ) +where +import Text.Pandoc.Extensions (Extension(..)) +import Text.Pandoc.Logging (LogMessage(MacroAlreadyDefined)) +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Readers.LaTeX.Types +import Text.Pandoc.Class +import Text.Pandoc.Shared (safeRead) +import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline, +                            optional, space, spaces, withRaw, (<|>)) +import Control.Applicative ((<|>), optional) +import qualified Data.Map as M +import Data.Text (Text) +import qualified Data.Text as T + +macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a +macroDef constructor = do +    (_, s) <- withRaw (commandDef <|> environmentDef) +    (constructor (untokenize s) <$ +      guardDisabled Ext_latex_macros) +     <|> return mempty +  where commandDef = do +          nameMacroPairs <- newcommand <|> letmacro <|> defmacro <|> newif +          guardDisabled Ext_latex_macros <|> +           mapM_ (\(name, macro') -> +                   updateState (\s -> s{ sMacros = M.insert name macro' +                                          (sMacros s) })) nameMacroPairs +        environmentDef = do +          mbenv <- newenvironment +          case mbenv of +            Nothing -> return () +            Just (name, macro1, macro2) -> +              guardDisabled Ext_latex_macros <|> +                do updateState $ \s -> s{ sMacros = +                    M.insert name macro1 (sMacros s) } +                   updateState $ \s -> s{ sMacros = +                    M.insert ("end" <> name) macro2 (sMacros s) } +        -- @\newenvironment{envname}[n-args][default]{begin}{end}@ +        -- is equivalent to +        -- @\newcommand{\envname}[n-args][default]{begin}@ +        -- @\newcommand{\endenvname}@ + +letmacro :: PandocMonad m => LP m [(Text, Macro)] +letmacro = do +  controlSeq "let" +  (name, contents) <- withVerbatimMode $ do +    Tok _ (CtrlSeq name) _ <- anyControlSeq +    optional $ symbol '=' +    spaces +    -- we first parse in verbatim mode, and then expand macros, +    -- because we don't want \let\foo\bar to turn into +    -- \let\foo hello if we have previously \def\bar{hello} +    contents <- bracedOrToken +    return (name, contents) +  contents' <- doMacros' 0 contents +  return [(name, Macro ExpandWhenDefined [] Nothing contents')] + +defmacro :: PandocMonad m => LP m [(Text, Macro)] +defmacro = do +  -- we use withVerbatimMode, because macros are to be expanded +  -- at point of use, not point of definition +  controlSeq "def" +  withVerbatimMode $ do +    Tok _ (CtrlSeq name) _ <- anyControlSeq +    argspecs <- many (argspecArg <|> argspecPattern) +    contents <- bracedOrToken +    return [(name, Macro ExpandWhenUsed argspecs Nothing contents)] + +-- \newif\iffoo' defines: +-- \iffoo to be \iffalse +-- \footrue to be a command that defines \iffoo to be \iftrue +-- \foofalse to be a command that defines \iffoo to be \iffalse +newif :: PandocMonad m => LP m [(Text, Macro)] +newif = do +  controlSeq "newif" +  withVerbatimMode $ do +    Tok pos (CtrlSeq name) _ <- anyControlSeq +    -- \def\iffoo\iffalse +    -- \def\footrue{\def\iffoo\iftrue} +    -- \def\foofalse{\def\iffoo\iffalse} +    let base = T.drop 2 name +    return [ (name, Macro ExpandWhenUsed [] Nothing +                    [Tok pos (CtrlSeq "iffalse") "\\iffalse"]) +           , (base <> "true", +                   Macro ExpandWhenUsed [] Nothing +                   [ Tok pos (CtrlSeq "def") "\\def" +                   , Tok pos (CtrlSeq name) ("\\" <> name) +                   , Tok pos (CtrlSeq "iftrue") "\\iftrue" +                   ]) +           , (base <> "false", +                   Macro ExpandWhenUsed [] Nothing +                   [ Tok pos (CtrlSeq "def") "\\def" +                   , Tok pos (CtrlSeq name) ("\\" <> name) +                   , Tok pos (CtrlSeq "iffalse") "\\iffalse" +                   ]) +           ] + +argspecArg :: PandocMonad m => LP m ArgSpec +argspecArg = do +  Tok _ (Arg i) _ <- satisfyTok isArgTok +  return $ ArgNum i + +argspecPattern :: PandocMonad m => LP m ArgSpec +argspecPattern = +  Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) -> +                              (toktype' == Symbol || toktype' == Word) && +                              (txt /= "{" && txt /= "\\" && txt /= "}"))) + +newcommand :: PandocMonad m => LP m [(Text, Macro)] +newcommand = do +  Tok pos (CtrlSeq mtype) _ <- controlSeq "newcommand" <|> +                             controlSeq "renewcommand" <|> +                             controlSeq "providecommand" <|> +                             controlSeq "DeclareMathOperator" <|> +                             controlSeq "DeclareRobustCommand" +  withVerbatimMode $ do +    Tok _ (CtrlSeq name) txt <- do +      optional (symbol '*') +      anyControlSeq <|> +        (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}') +    spaces +    numargs <- option 0 $ try bracketedNum +    let argspecs = map ArgNum [1..numargs] +    spaces +    optarg <- option Nothing $ Just <$> try bracketedToks +    spaces +    contents' <- bracedOrToken +    let contents = +         case mtype of +              "DeclareMathOperator" -> +                 Tok pos (CtrlSeq "mathop") "\\mathop" +                 : Tok pos Symbol "{" +                 : Tok pos (CtrlSeq "mathrm") "\\mathrm" +                 : Tok pos Symbol "{" +                 : (contents' ++ +                   [ Tok pos Symbol "}", Tok pos Symbol "}" ]) +              _                     -> contents' +    macros <- sMacros <$> getState +    case M.lookup name macros of +        Just macro +          | mtype == "newcommand" -> do +              report $ MacroAlreadyDefined txt pos +              return [(name, macro)] +          | mtype == "providecommand" -> return [(name, macro)] +        _ -> return [(name, Macro ExpandWhenUsed argspecs optarg contents)] + +newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro)) +newenvironment = do +  pos <- getPosition +  Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|> +                             controlSeq "renewenvironment" <|> +                             controlSeq "provideenvironment" +  withVerbatimMode $ do +    optional $ symbol '*' +    spaces +    name <- untokenize <$> braced +    spaces +    numargs <- option 0 $ try bracketedNum +    spaces +    optarg <- option Nothing $ Just <$> try bracketedToks +    let argspecs = map (\i -> ArgNum i) [1..numargs] +    startcontents <- spaces >> bracedOrToken +    endcontents <- spaces >> bracedOrToken +    macros <- sMacros <$> getState +    case M.lookup name macros of +         Just _ +           | mtype == "newenvironment" -> do +               report $ MacroAlreadyDefined name pos +               return Nothing +           | mtype == "provideenvironment" -> +               return Nothing +         _ -> return $ Just (name, +                      Macro ExpandWhenUsed argspecs optarg startcontents, +                      Macro ExpandWhenUsed [] Nothing endcontents) + +bracketedNum :: PandocMonad m => LP m Int +bracketedNum = do +  ds <- untokenize <$> bracketedToks +  case safeRead ds of +       Just i -> return i +       _      -> return 0 diff --git a/src/Text/Pandoc/Readers/LaTeX/Math.hs b/src/Text/Pandoc/Readers/LaTeX/Math.hs new file mode 100644 index 000000000..5b49a0376 --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Math.hs @@ -0,0 +1,221 @@ +{-# LANGUAGE OverloadedStrings #-} +module Text.Pandoc.Readers.LaTeX.Math +  ( dollarsMath +  , inlineEnvironments +  , inlineEnvironment +  , mathInline +  , mathDisplay +  , theoremstyle +  , theoremEnvironment +  , newtheorem +  , proof +  ) +where +import Data.Maybe (fromMaybe) +import Text.Pandoc.Walk (walk) +import Text.Pandoc.Builder as B +import qualified Data.Sequence as Seq +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Readers.LaTeX.Types +import Text.Pandoc.Class +import Text.Pandoc.Shared (trimMath, stripTrailingNewlines) +import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline, +                            optional, space, spaces, withRaw, (<|>)) +import Control.Applicative ((<|>), optional) +import Control.Monad (guard, mzero) +import qualified Data.Map as M +import Data.Text (Text) + +dollarsMath :: PandocMonad m => LP m Inlines +dollarsMath = do +  symbol '$' +  display <- option False (True <$ symbol '$') +  (do contents <- try $ untokenize <$> pDollarsMath 0 +      if display +         then mathDisplay contents <$ symbol '$' +         else return $ mathInline contents) +   <|> (guard display >> return (mathInline "")) + +-- Int is number of embedded groupings +pDollarsMath :: PandocMonad m => Int -> LP m [Tok] +pDollarsMath n = do +  tk@(Tok _ toktype t) <- anyTok +  case toktype of +       Symbol | t == "$" +              , n == 0 -> return [] +              | t == "\\" -> do +                  tk' <- anyTok +                  (tk :) . (tk' :) <$> pDollarsMath n +              | t == "{" -> (tk :) <$> pDollarsMath (n+1) +              | t == "}" -> +                if n > 0 +                then (tk :) <$> pDollarsMath (n-1) +                else mzero +       _ -> (tk :) <$> pDollarsMath n + +mathDisplay :: Text -> Inlines +mathDisplay = displayMath . trimMath + +mathInline :: Text -> Inlines +mathInline = math . trimMath + +mathEnvWith :: PandocMonad m +            => (Inlines -> a) -> Maybe Text -> Text -> LP m a +mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name +   where inner x = case innerEnv of +                        Nothing -> x +                        Just y  -> "\\begin{" <> y <> "}\n" <> x <> +                                   "\\end{" <> y <> "}" + +mathEnv :: PandocMonad m => Text -> LP m Text +mathEnv name = do +  skipopts +  optional blankline +  res <- manyTill anyTok (end_ name) +  return $ stripTrailingNewlines $ untokenize res + +inlineEnvironment :: PandocMonad m => LP m Inlines +inlineEnvironment = try $ do +  controlSeq "begin" +  name <- untokenize <$> braced +  M.findWithDefault mzero name inlineEnvironments + +inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines) +inlineEnvironments = M.fromList [ +    ("displaymath", mathEnvWith id Nothing "displaymath") +  , ("math", math <$> mathEnv "math") +  , ("equation", mathEnvWith id Nothing "equation") +  , ("equation*", mathEnvWith id Nothing "equation*") +  , ("gather", mathEnvWith id (Just "gathered") "gather") +  , ("gather*", mathEnvWith id (Just "gathered") "gather*") +  , ("multline", mathEnvWith id (Just "gathered") "multline") +  , ("multline*", mathEnvWith id (Just "gathered") "multline*") +  , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray") +  , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*") +  , ("align", mathEnvWith id (Just "aligned") "align") +  , ("align*", mathEnvWith id (Just "aligned") "align*") +  , ("alignat", mathEnvWith id (Just "aligned") "alignat") +  , ("alignat*", mathEnvWith id (Just "aligned") "alignat*") +  , ("dmath", mathEnvWith id Nothing "dmath") +  , ("dmath*", mathEnvWith id Nothing "dmath*") +  , ("dgroup", mathEnvWith id (Just "aligned") "dgroup") +  , ("dgroup*", mathEnvWith id (Just "aligned") "dgroup*") +  , ("darray", mathEnvWith id (Just "aligned") "darray") +  , ("darray*", mathEnvWith id (Just "aligned") "darray*") +  ] + +theoremstyle :: PandocMonad m => LP m Blocks +theoremstyle = do +  stylename <- untokenize <$> braced +  let mbstyle = case stylename of +                  "plain"      -> Just PlainStyle +                  "definition" -> Just DefinitionStyle +                  "remark"     -> Just RemarkStyle +                  _            -> Nothing +  case mbstyle of +    Nothing  -> return () +    Just sty -> updateState $ \s -> s{ sLastTheoremStyle = sty } +  return mempty + +newtheorem :: PandocMonad m => LP m Inlines -> LP m Blocks +newtheorem inline = do +  number <- option True (False <$ symbol '*' <* sp) +  name <- untokenize <$> braced +  sp +  series <- option Nothing $ Just . untokenize <$> bracketedToks +  sp +  showName <- tokWith inline +  sp +  syncTo <- option Nothing $ Just . untokenize <$> bracketedToks +  sty <- sLastTheoremStyle <$> getState +  let spec = TheoremSpec { theoremName = showName +                         , theoremStyle = sty +                         , theoremSeries = series +                         , theoremSyncTo = syncTo +                         , theoremNumber = number +                         , theoremLastNum = DottedNum [0] } +  tmap <- sTheoremMap <$> getState +  updateState $ \s -> s{ sTheoremMap = +                            M.insert name spec tmap } +  return mempty + +theoremEnvironment :: PandocMonad m +                   => LP m Blocks -> LP m Inlines -> Text -> LP m Blocks +theoremEnvironment blocks opt name = do +  tmap <- sTheoremMap <$> getState +  case M.lookup name tmap of +    Nothing -> mzero +    Just tspec -> do +       optTitle <- option mempty $ (\x -> space <> "(" <> x <> ")") <$> opt +       mblabel <- option Nothing $ Just . untokenize <$> +                   try (spaces >> controlSeq "label" >> spaces >> braced) +       bs <- env name blocks +       number <- +         if theoremNumber tspec +            then do +               let name' = fromMaybe name $ theoremSeries tspec +               num <- getNextNumber +                   (maybe (DottedNum [0]) theoremLastNum . +                    M.lookup name' . sTheoremMap) +               updateState $ \s -> +                 s{ sTheoremMap = +                       M.adjust +                       (\spec -> spec{ theoremLastNum = num }) +                       name' +                       (sTheoremMap s) +                  } + +               case mblabel of +                 Just ident -> +                   updateState $ \s -> +                     s{ sLabels = M.insert ident +                         (B.toList $ +                           theoremName tspec <> "\160" <> +                           str (renderDottedNum num)) (sLabels s) } +                 Nothing -> return () +               return $ space <> B.text (renderDottedNum num) +            else return mempty +       let titleEmph = case theoremStyle tspec of +                         PlainStyle      -> B.strong +                         DefinitionStyle -> B.strong +                         RemarkStyle     -> B.emph +       let title = titleEmph (theoremName tspec <> number) +                      <> optTitle <> "." <> space +       return $ divWith (fromMaybe "" mblabel, [name], []) $ addTitle title +              $ case theoremStyle tspec of +                  PlainStyle -> walk italicize bs +                  _          -> bs + + + +proof :: PandocMonad m => LP m Blocks -> LP m Inlines -> LP m Blocks +proof blocks opt = do +  title <- option (B.text "Proof") opt +  bs <- env "proof" blocks +  return $ +    B.divWith ("", ["proof"], []) $ +      addQed $ addTitle (B.emph (title <> ".")) bs + +addTitle :: Inlines -> Blocks -> Blocks +addTitle ils bs = +  case B.toList bs of +    (Para xs : rest) +      -> B.fromList (Para (B.toList ils ++ (Space : xs)) : rest) +    _ -> B.para ils <> bs + +addQed :: Blocks -> Blocks +addQed bs = +  case Seq.viewr (B.unMany bs) of +    s Seq.:> Para ils +      -> B.Many (s Seq.|> Para (ils ++ B.toList qedSign)) +    _ -> bs <> B.para qedSign + where +  qedSign = B.str "\xa0\x25FB" + +italicize :: Block -> Block +italicize x@(Para [Image{}]) = x -- see #6925 +italicize (Para ils) = Para [Emph ils] +italicize (Plain ils) = Plain [Emph ils] +italicize x = x + + diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index 563d32883..9dac4d6ef 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -1,11 +1,12 @@  {-# LANGUAGE FlexibleInstances     #-} +{-# LANGUAGE FlexibleContexts      #-}  {-# LANGUAGE LambdaCase            #-}  {-# LANGUAGE MultiParamTypeClasses #-}  {-# LANGUAGE OverloadedStrings     #-}  {-# LANGUAGE ScopedTypeVariables   #-}  {- |     Module      : Text.Pandoc.Readers.LaTeX.Parsing -   Copyright   : Copyright (C) 2006-2020 John MacFarlane +   Copyright   : Copyright (C) 2006-2021 John MacFarlane     License     : GNU GPL, version 2 or above     Maintainer  : John MacFarlane <jgm@berkeley.edu> @@ -27,11 +28,15 @@ module Text.Pandoc.Readers.LaTeX.Parsing    , rawLaTeXParser    , applyMacros    , tokenize +  , tokenizeSources +  , getInputTokens    , untokenize    , untoken    , totoks    , toksToString    , satisfyTok +  , parseFromToks +  , disablingWithRaw    , doMacros    , doMacros'    , setpos @@ -52,6 +57,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing    , comment    , anyTok    , singleChar +  , tokWith    , specialChars    , endline    , blankline @@ -78,6 +84,11 @@ module Text.Pandoc.Readers.LaTeX.Parsing    , rawopt    , overlaySpecification    , getNextNumber +  , label +  , setCaption +  , resetCaption +  , env +  , addMeta    ) where  import Control.Applicative (many, (<|>)) @@ -87,13 +98,15 @@ import Control.Monad.Trans (lift)  import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord)  import Data.Default  import Data.List (intercalate) +import qualified Data.IntMap as IntMap  import qualified Data.Map as M  import qualified Data.Set as Set  import Data.Text (Text)  import qualified Data.Text as T  import Text.Pandoc.Builder  import Text.Pandoc.Class.PandocMonad (PandocMonad, report) -import Text.Pandoc.Error (PandocError (PandocMacroLoop)) +import Text.Pandoc.Error +         (PandocError (PandocMacroLoop,PandocShouldNeverHappenError))  import Text.Pandoc.Logging  import Text.Pandoc.Options  import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline, @@ -102,7 +115,6 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),                                          ArgSpec (..), Tok (..), TokType (..))  import Text.Pandoc.Shared  import Text.Parsec.Pos --- import Debug.Trace  newtype DottedNum = DottedNum [Int]    deriving (Show, Eq) @@ -151,7 +163,9 @@ data LaTeXState = LaTeXState{ sOptions       :: ReaderOptions                              , sLabels        :: M.Map Text [Inline]                              , sHasChapters   :: Bool                              , sToggles       :: M.Map Text Bool -                            , sExpanded      :: Bool +                            , sFileContents  :: M.Map Text Text +                            , sEnableWithRaw :: Bool +                            , sRawTokens     :: IntMap.IntMap [Tok]                              }       deriving Show @@ -176,7 +190,9 @@ defaultLaTeXState = LaTeXState{ sOptions       = def                                , sLabels        = M.empty                                , sHasChapters   = False                                , sToggles       = M.empty -                              , sExpanded      = False +                              , sFileContents  = M.empty +                              , sEnableWithRaw = True +                              , sRawTokens     = IntMap.empty                                }  instance PandocMonad m => HasQuoteContext LaTeXState m where @@ -232,21 +248,25 @@ withVerbatimMode parser = do         updateState $ \st -> st{ sVerbatimMode = False }         return result -rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s) +rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s, Show a)                 => [Tok] -> Bool -> LP m a -> LP m a -               -> ParserT Text s m (a, Text) +               -> ParserT Sources s m (a, Text)  rawLaTeXParser toks retokenize parser valParser = do    pstate <- getState    let lstate = def{ sOptions = extractReaderOptions pstate }    let lstate' = lstate { sMacros = extractMacros pstate } +  let setStartPos = case toks of +                      Tok pos _ _ : _ -> setPosition pos +                      _ -> return () +  let preparser = setStartPos >> parser    let rawparser = (,) <$> withRaw valParser <*> getState -  res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks +  res' <- lift $ runParserT (withRaw (preparser >> getPosition)) +                            lstate "chunk" toks    case res' of         Left _    -> mzero -       Right toks' -> do +       Right (endpos, toks') -> do           res <- lift $ runParserT (do when retokenize $ do                                          -- retokenize, applying macros -                                        doMacros                                          ts <- many (satisfyTok (const True))                                          setInput ts                                        rawparser) @@ -255,7 +275,13 @@ rawLaTeXParser toks retokenize parser valParser = do                Left _    -> mzero                Right ((val, raw), st) -> do                  updateState (updateMacros (sMacros st <>)) -                _ <- takeP (T.length (untokenize toks')) +                let skipTilPos stopPos = do +                      anyChar +                      pos <- getPosition +                      if pos >= stopPos +                         then return () +                         else skipTilPos stopPos +                skipTilPos endpos                  let result = untokenize raw                  -- ensure we end with space if input did, see #4442                  let result' = @@ -268,7 +294,7 @@ rawLaTeXParser toks retokenize parser valParser = do                  return (val, result')  applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s) -            => Text -> ParserT Text s m Text +            => Text -> ParserT Sources s m Text  applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>     do let retokenize = untokenize <$> many (satisfyTok (const True))        pstate <- getState @@ -279,6 +305,31 @@ applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>             Left e   -> Prelude.fail (show e)             Right s' -> return s' +{- +When tokenize or untokenize change, test with this +QuickCheck property: + +> tokUntokRoundtrip :: String -> Bool +> tokUntokRoundtrip s = +>   let t = T.pack s in untokenize (tokenize "random" t) == t +-} + +tokenizeSources :: Sources -> [Tok] +tokenizeSources = concatMap tokenizeSource . unSources + where +   tokenizeSource (pos, t) = totoks pos t + +-- Return tokens from input sources. Ensure that starting position is +-- correct. +getInputTokens :: PandocMonad m => ParserT Sources s m [Tok] +getInputTokens = do +  pos <- getPosition +  ss <- getInput +  return $ +    case ss of +      Sources [] -> [] +      Sources ((_,t):rest) -> tokenizeSources $ Sources ((pos,t):rest) +  tokenize :: SourceName -> Text -> [Tok]  tokenize sourcename = totoks (initialPos sourcename) @@ -402,41 +453,62 @@ untoken t = untokenAccum t mempty  toksToString :: [Tok] -> String  toksToString = T.unpack . untokenize +parseFromToks :: PandocMonad m => LP m a -> [Tok] -> LP m a +parseFromToks parser toks = do +  oldInput <- getInput +  setInput toks +  oldpos <- getPosition +  case toks of +     Tok pos _ _ : _ -> setPosition pos +     _ -> return () +  result <- disablingWithRaw parser +  setInput oldInput +  setPosition oldpos +  return result + +disablingWithRaw :: PandocMonad m => LP m a -> LP m a +disablingWithRaw parser = do +  oldEnableWithRaw <- sEnableWithRaw <$> getState +  updateState $ \st -> st{ sEnableWithRaw = False } +  result <- parser +  updateState $ \st -> st{ sEnableWithRaw = oldEnableWithRaw } +  return result +  satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok  satisfyTok f = do      doMacros -- apply macros on remaining input stream      res <- tokenPrim (T.unpack . untoken) updatePos matcher -    updateState $ \st -> st{ sExpanded = False } -    return res +    updateState $ \st -> +      if sEnableWithRaw st +         then st{ sRawTokens = IntMap.map (res:) $ sRawTokens st } +         else st +    return $! res    where matcher t | f t       = Just t                    | otherwise = Nothing          updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos          updatePos _spos _ (Tok pos _ _ : _) = pos -        updatePos spos _ []                 = incSourceColumn spos 1 +        updatePos spos (Tok _ _ t)  []      = incSourceColumn spos (T.length t)  doMacros :: PandocMonad m => LP m ()  doMacros = do -  expanded <- sExpanded <$> getState -  verbatimMode <- sVerbatimMode <$> getState -  unless (expanded || verbatimMode) $ do -      getInput >>= doMacros' 1 >>= setInput -      updateState $ \st -> st{ sExpanded = True } +  st <- getState +  unless (sVerbatimMode st) $ +    getInput >>= doMacros' 1 >>= setInput  doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok]  doMacros' n inp =    case inp of       Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :        Tok _ Word name : Tok _ Symbol "}" : ts -        -> handleMacros n spos name ts +        -> handleMacros n spos name ts <|> return inp       Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :        Tok _ Word name : Tok _ Symbol "}" : ts -        -> handleMacros n spos ("end" <> name) ts +        -> handleMacros n spos ("end" <> name) ts <|> return inp       Tok _ (CtrlSeq "expandafter") _ : t : ts          -> combineTok t <$> doMacros' n ts       Tok spos (CtrlSeq name) _ : ts -        -> handleMacros n spos name ts +        -> handleMacros n spos name ts <|> return inp       _ -> return inp -   <|> return inp    where      combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts) @@ -482,7 +554,7 @@ doMacros' n inp =          $ throwError $ PandocMacroLoop name        macros <- sMacros <$> getState        case M.lookup name macros of -           Nothing -> mzero +           Nothing -> trySpecialMacro name ts             Just (Macro expansionPoint argspecs optarg newtoks) -> do               let getargs' = do                     args <- @@ -510,6 +582,41 @@ doMacros' n inp =                     ExpandWhenUsed    -> doMacros' (n' + 1) result                     ExpandWhenDefined -> return result +-- | Certain macros do low-level tex manipulations that can't +-- be represented in our Macro type, so we handle them here. +trySpecialMacro :: PandocMonad m => Text -> [Tok] -> LP m [Tok] +trySpecialMacro "xspace" ts = do +  ts' <- doMacros' 1 ts +  case ts' of +    Tok pos Word t : _ +      | startsWithAlphaNum t -> return $ Tok pos Spaces " " : ts' +    _ -> return ts' +trySpecialMacro "iftrue" ts = handleIf True ts +trySpecialMacro "iffalse" ts = handleIf False ts +trySpecialMacro _ _ = mzero + +handleIf :: PandocMonad m => Bool -> [Tok] -> LP m [Tok] +handleIf b ts = do +  res' <- lift $ runParserT (ifParser b) defaultLaTeXState "tokens" ts +  case res' of +    Left _ -> Prelude.fail "Could not parse conditional" +    Right ts' -> return ts' + +ifParser :: PandocMonad m => Bool -> LP m [Tok] +ifParser b = do +  ifToks <- many (notFollowedBy (controlSeq "else" <|> controlSeq "fi") +                    *> anyTok) +  elseToks <- (controlSeq "else" >> manyTill anyTok (controlSeq "fi")) +                 <|> ([] <$ controlSeq "fi") +  rest <- getInput +  return $ (if b then ifToks else elseToks) ++ rest + +startsWithAlphaNum :: Text -> Bool +startsWithAlphaNum t = +  case T.uncons t of +       Just (c, _) | isAlphaNum c -> True +       _           -> False +  setpos :: SourcePos -> Tok -> Tok  setpos spos (Tok _ tt txt) = Tok spos tt txt @@ -592,18 +699,22 @@ isCommentTok _                 = False  anyTok :: PandocMonad m => LP m Tok  anyTok = satisfyTok (const True) +singleCharTok :: PandocMonad m => LP m Tok +singleCharTok = +  satisfyTok $ \case +     Tok _ Word  t   -> T.length t == 1 +     Tok _ Symbol t  -> not (T.any (`Set.member` specialChars) t) +     _               -> False +  singleChar :: PandocMonad m => LP m Tok -singleChar = try $ do -  Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol]) -  guard $ not $ toktype == Symbol && -                T.any (`Set.member` specialChars) t -  if T.length t > 1 -     then do -       let (t1, t2) = (T.take 1 t, T.drop 1 t) -       inp <- getInput -       setInput $ Tok (incSourceColumn pos 1) toktype t2 : inp -       return $ Tok pos toktype t1 -     else return $ Tok pos toktype t +singleChar = singleCharTok <|> singleCharFromWord + where +  singleCharFromWord = do +    Tok pos toktype t <- disablingWithRaw $ satisfyTok isWordTok +    let (t1, t2) = (T.take 1 t, T.drop 1 t) +    inp <- getInput +    setInput $ Tok pos toktype t1 : Tok (incSourceColumn pos 1) toktype t2 : inp +    anyTok  specialChars :: Set.Set Char  specialChars = Set.fromList "#$%&~_^\\{}" @@ -646,28 +757,25 @@ grouped parser = try $ do    -- {{a,b}} should be parsed the same as {a,b}    try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup) -braced' :: PandocMonad m => LP m Tok -> Int -> LP m [Tok] -braced' getTok n = -  handleEgroup <|> handleBgroup <|> handleOther -  where handleEgroup = do -          t <- symbol '}' -          if n == 1 -             then return [] -             else (t:) <$> braced' getTok (n - 1) -        handleBgroup = do -          t <- symbol '{' -          (t:) <$> braced' getTok (n + 1) -        handleOther = do -          t <- getTok -          (t:) <$> braced' getTok n +braced' :: PandocMonad m => LP m Tok -> LP m [Tok] +braced' getTok = symbol '{' *> go (1 :: Int) + where +  go n = do +    t <- getTok +    case t of +      Tok _ Symbol "}" +        | n > 1     -> (t:) <$> go (n - 1) +        | otherwise -> return [] +      Tok _ Symbol "{" -> (t:) <$> go (n + 1) +      _ -> (t:) <$> go n  braced :: PandocMonad m => LP m [Tok] -braced = symbol '{' *> braced' anyTok 1 +braced = braced' anyTok  -- URLs require special handling, because they can contain %  -- characters.  So we retonenize comments as we go...  bracedUrl :: PandocMonad m => LP m [Tok] -bracedUrl = bgroup *> braced' (retokenizeComment >> anyTok) 1 +bracedUrl = braced' (retokenizeComment >> anyTok)  -- For handling URLs, which allow literal % characters...  retokenizeComment :: PandocMonad m => LP m () @@ -723,16 +831,29 @@ ignore raw = do  withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok])  withRaw parser = do -  inp <- getInput +  rawTokensMap <- sRawTokens <$> getState +  let key = case IntMap.lookupMax rawTokensMap of +               Nothing     -> 0 +               Just (n,_)  -> n + 1 +  -- insert empty list at key +  updateState $ \st -> st{ sRawTokens = +                             IntMap.insert key [] $ sRawTokens st }    result <- parser -  nxtpos <- option Nothing ((\(Tok pos' _ _) -> Just pos') <$> lookAhead anyTok) -  let raw = takeWhile (\(Tok pos _ _) -> maybe True -                  (\p -> sourceName p /= sourceName pos || pos < p) nxtpos) inp +  mbRevToks <- IntMap.lookup key . sRawTokens <$> getState +  raw <- case mbRevToks of +           Just revtoks -> do +             updateState $ \st -> st{ sRawTokens = +                                        IntMap.delete key $ sRawTokens st} +             return $ reverse revtoks +           Nothing      -> +             throwError $ PandocShouldNeverHappenError $ +                "sRawTokens has nothing at key " <> T.pack (show key)    return (result, raw)  keyval :: PandocMonad m => LP m (Text, Text)  keyval = try $ do -  Tok _ Word key <- satisfyTok isWordTok +  key <- untokenize <$> many1 (notFollowedBy (symbol '=') >> +                         (symbol '-' <|> symbol '_' <|> satisfyTok isWordTok))    sp    val <- option mempty $ do             symbol '=' @@ -792,7 +913,7 @@ getRawCommand name txt = do    (_, rawargs) <- withRaw $        case name of             "write" -> do -             void $ satisfyTok isWordTok -- digits +             void $ many $ satisfyTok isDigitTok -- digits               void braced             "titleformat" -> do               void braced @@ -807,6 +928,10 @@ getRawCommand name txt = do                 void $ many braced    return $ txt <> untokenize rawargs +isDigitTok :: Tok -> Bool +isDigitTok (Tok _ Word t) = T.all isDigit t +isDigitTok _              = False +  skipopts :: PandocMonad m => LP m ()  skipopts = skipMany (void overlaySpecification <|> void rawopt) @@ -874,3 +999,35 @@ getNextNumber getCurrentNum = do                 Just n  -> [n, 1]                 Nothing -> [1] +label :: PandocMonad m => LP m () +label = do +  controlSeq "label" +  t <- braced +  updateState $ \st -> st{ sLastLabel = Just $ untokenize t } + +setCaption :: PandocMonad m => LP m Inlines -> LP m () +setCaption inline = try $ do +  skipopts +  ils <- tokWith inline +  optional $ try $ spaces *> label +  updateState $ \st -> st{ sCaption = Just ils } + +resetCaption :: PandocMonad m => LP m () +resetCaption = updateState $ \st -> st{ sCaption   = Nothing +                                      , sLastLabel = Nothing } + +env :: PandocMonad m => Text -> LP m a -> LP m a +env name p = p <* end_ name + +tokWith :: PandocMonad m => LP m Inlines -> LP m Inlines +tokWith inlineParser = try $ spaces >> +                                 grouped inlineParser +                            <|> (lookAhead anyControlSeq >> inlineParser) +                            <|> singleChar' +  where singleChar' = do +          Tok _ _ t <- singleChar +          return $ str t + +addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m () +addMeta field val = updateState $ \st -> +   st{ sMeta = addMetaField field val $ sMeta st } diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs index db9c276e7..b8bf0ce7f 100644 --- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs +++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs @@ -1,12 +1,7 @@ +{-# LANGUAGE ScopedTypeVariables #-}  {-# LANGUAGE OverloadedStrings #-}  module Text.Pandoc.Readers.LaTeX.SIunitx -  ( dosi -  , doSI -  , doSIrange -  , doSInum -  , doSInumlist -  , doSIang -  ) +  ( siunitxCommands )  where  import Text.Pandoc.Builder  import Text.Pandoc.Readers.LaTeX.Parsing @@ -15,14 +10,32 @@ import Text.Pandoc.Class  import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,                              optional, space, spaces, withRaw, (<|>))  import Control.Applicative ((<|>)) +import Control.Monad (void)  import qualified Data.Map as M  import Data.Char (isDigit)  import Data.Text (Text)  import qualified Data.Text as T  import Data.List (intersperse) +import qualified Data.Sequence as Seq +import Text.Pandoc.Walk (walk) + +siunitxCommands :: PandocMonad m +                 => LP m Inlines -> M.Map Text (LP m Inlines) +siunitxCommands tok = M.fromList +  [ ("si", dosi tok) +  , ("SI", doSI tok) +  , ("SIrange", doSIrange True tok) +  , ("numrange", doSIrange False tok) +  , ("numlist", doSInumlist) +  , ("SIlist", doSIlist tok) +  , ("num", doSInum) +  , ("ang", doSIang) +  ]  dosi :: PandocMonad m => LP m Inlines -> LP m Inlines -dosi tok = grouped (siUnit tok) <|> siUnit tok +dosi tok = do +  options <- option [] keyvals +  grouped (siUnit options tok) <|> siUnit options tok  -- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €"  doSI :: PandocMonad m => LP m Inlines -> LP m Inlines @@ -57,23 +70,50 @@ doSInumlist = do               mconcat (intersperse (str "," <> space) (init xs)) <>               text ", & " <> last xs +doSIlist :: PandocMonad m => LP m Inlines -> LP m Inlines +doSIlist tok = do +  options <- option [] keyvals +  nums <- map tonum . T.splitOn ";" . untokenize <$> braced +  unit <- grouped (siUnit options tok) <|> siUnit options tok +  let xs = map (<> (str "\xa0" <> unit)) nums +  case xs of +    []  -> return mempty +    [x] -> return x +    _   -> return $ +             mconcat (intersperse (str "," <> space) (init xs)) <> +             text ", & " <> last xs +  parseNum :: Parser Text () Inlines  parseNum = (mconcat <$> many parseNumPart) <* eof +minus :: Text +minus = "\x2212" + +hyphenToMinus :: Inline -> Inline +hyphenToMinus (Str t) = Str (T.replace "-" minus t) +hyphenToMinus x = x +  parseNumPart :: Parser Text () Inlines  parseNumPart =    parseDecimalNum <|>    parseComma <|>    parsePlusMinus <|> +  parsePM <|>    parseI <|>    parseExp <|>    parseX <|>    parseSpace   where -  parseDecimalNum = do -    pref <- option mempty $ (mempty <$ char '+') <|> ("\x2212" <$ char '-') -    basenum <- (pref <>) . T.pack -                <$> many1 (satisfy (\c -> isDigit c || c == '.')) +  parseDecimalNum, parsePlusMinus, parsePM, +    parseComma, parseI, parseX, +    parseExp, parseSpace :: Parser Text () Inlines +  parseDecimalNum = try $ do +    pref <- option mempty $ (mempty <$ char '+') <|> (minus <$ char '-') +    basenum' <- many1 (satisfy (\c -> isDigit c || c == '.')) +    let basenum = pref <> T.pack +                    (case basenum' of +                      '.':_ -> '0':basenum' +                      _ -> basenum')      uncertainty <- option mempty $ T.pack <$> parseParens      if T.null uncertainty         then return $ str basenum @@ -91,6 +131,7 @@ parseNumPart =                                                 | otherwise -> "." <> t    parseComma = str "." <$ char ','    parsePlusMinus = str "\xa0\xb1\xa0" <$ try (string "+-") +  parsePM = str "\xa0\xb1\xa0" <$ try (string "\\pm")    parseParens =      char '(' *> many1 (satisfy (\c -> isDigit c || c == '.')) <* char ')'    parseI = str "i" <$ char 'i' @@ -103,11 +144,14 @@ doSIang :: PandocMonad m => LP m Inlines  doSIang = do    skipopts    ps <- T.splitOn ";" . untokenize <$> braced +  let dropPlus t = case T.uncons t of +                     Just ('+',t') -> t' +                     _ -> t    case ps ++ repeat "" of      (d:m:s:_) -> return $ -      (if T.null d then mempty else str d <> str "\xb0") <> -      (if T.null m then mempty else str m <> str "\x2032") <> -      (if T.null s then mempty else str s <> str "\x2033") +      (if T.null d then mempty else str (dropPlus d) <> str "\xb0") <> +      (if T.null m then mempty else str (dropPlus m) <> str "\x2032") <> +      (if T.null s then mempty else str (dropPlus s) <> str "\x2033")      _ -> return mempty  -- converts e.g. \SIrange{100}{200}{\ms} to "100 ms--200 ms" @@ -136,40 +180,99 @@ doSIrange includeUnits tok = do  emptyOr160 :: Inlines -> Inlines  emptyOr160 x = if x == mempty then x else str "\160" -siUnit :: PandocMonad m => LP m Inlines -> LP m Inlines -siUnit tok = try (do -  Tok _ (CtrlSeq name) _ <- anyControlSeq -  case name of -    "square" -> do -       unit <- siUnit tok -       return $ unit <> superscript "2" -    "cubic" -> do -       unit <- siUnit tok -       return $ unit <> superscript "3" -    "raisetothe" -> do -       n <- tok -       unit <- siUnit tok -       return $ unit <> superscript n -    _ -> -       case M.lookup name siUnitMap of -            Just il -> -              option il $ -                choice -                 [ (il <> superscript "2") <$ controlSeq "squared" -                 , (il <> superscript "3") <$ controlSeq "cubed" -                 , (\n -> il <> superscript n) <$> (controlSeq "tothe" *> tok) -                 ] -            Nothing -> fail "not an siunit unit command") - <|> (lookAhead anyControlSeq >> tok) - <|> (do Tok _ Word t <- satisfyTok isWordTok -         return $ str t) - <|> (symbol '^' *> (superscript <$> tok)) - <|> (symbol '_' *> (subscript <$> tok)) - <|> ("\xa0" <$ symbol '.') - <|> ("\xa0" <$ symbol '~') - <|> tok - <|> (do Tok _ _ t <- anyTok -         return (str t)) +siUnit :: forall m. PandocMonad m => [(Text,Text)] -> LP m Inlines -> LP m Inlines +siUnit options tok = mconcat . intersperse (str "\xa0") <$> many1 siUnitPart + where +  siUnitPart :: LP m Inlines +  siUnitPart = try $ do +    skipMany (void (symbol '.') <|> void (symbol '~') <|> spaces1) +    x <- ((siPrefix <*> siBase) +            <|> (do u <- siBase <|> tok +                    option u $ siSuffix <*> pure u)) +    option x (siInfix x) +  siInfix :: Inlines -> LP m Inlines +  siInfix u1 = try $ +       (do _ <- controlSeq "per" +           u2 <- siUnitPart +           let useSlash = lookup "per-mode" options == Just "symbol" +           if useSlash +              then return (u1 <> str "/" <> u2) +              else return (u1 <> str "\xa0" <> negateExponent u2)) +   <|> (do _ <- symbol '/' +           u2 <- siUnitPart +           return (u1 <> str "/" <> u2)) +  siPrefix :: LP m (Inlines -> Inlines) +  siPrefix = +       (do _ <- controlSeq "square" +           skipopts +           return (<> superscript "2")) +   <|> (do _ <- controlSeq "cubic" +           skipopts +           return (<> superscript "3")) +   <|> (do _ <- controlSeq "raisetothe" +           skipopts +           n <- walk hyphenToMinus <$> tok +           return (<> superscript n)) +  siSuffix :: LP m (Inlines -> Inlines) +  siSuffix = +       (do _ <- controlSeq "squared" +           skipopts +           return (<> superscript "2")) +   <|> (do _ <- controlSeq "cubed" +           skipopts +           return (<> superscript "3")) +   <|> (do _ <- controlSeq "tothe" +           skipopts +           n <- walk hyphenToMinus <$> tok +           return (<> superscript n)) +   <|> (symbol '^' *> (do n <- walk hyphenToMinus <$> tok +                          return (<> superscript n))) +   <|> (symbol '_' *> (do n <- walk hyphenToMinus <$> tok +                          return (<> subscript n))) +  negateExponent :: Inlines -> Inlines +  negateExponent ils = +    case Seq.viewr (unMany ils) of +      xs Seq.:> Superscript ss -> (Many xs) <> +                                     superscript (str minus <> fromList ss) +      _ -> ils <> superscript (str (minus <> "1")) +  siBase :: LP m Inlines +  siBase = +    ((try +       (do Tok _ (CtrlSeq name) _ <- anyControlSeq +           case M.lookup name siUnitModifierMap of +              Just il -> (il <>) <$> siBase +              Nothing -> +                case M.lookup name siUnitMap of +                   Just il -> pure il +                   Nothing -> fail "not a unit command")) +    <|> (do Tok _ Word t <- satisfyTok isWordTok +            return $ str t) +     ) + +siUnitModifierMap :: M.Map Text Inlines +siUnitModifierMap = M.fromList +  [ ("atto", str "a") +  , ("centi", str "c") +  , ("deca", str "d") +  , ("deci", str "d") +  , ("deka", str "d") +  , ("exa", str "E") +  , ("femto", str "f") +  , ("giga", str "G") +  , ("hecto", str "h") +  , ("kilo", str "k") +  , ("mega", str "M") +  , ("micro", str "μ") +  , ("milli", str "m") +  , ("nano", str "n") +  , ("peta", str "P") +  , ("pico", str "p") +  , ("tera", str "T") +  , ("yocto", str "y") +  , ("yotta", str "Y") +  , ("zepto", str "z") +  , ("zetta", str "Z") +  ]  siUnitMap :: M.Map Text Inlines  siUnitMap = M.fromList @@ -269,7 +372,6 @@ siUnitMap = M.fromList    , ("arcsecond", str "″")    , ("astronomicalunit", str "ua")    , ("atomicmassunit", str "u") -  , ("atto", str "a")    , ("bar", str "bar")    , ("barn", str "b")    , ("becquerel", str "Bq") @@ -277,51 +379,38 @@ siUnitMap = M.fromList    , ("bohr", emph (str "a") <> subscript (str "0"))    , ("candela", str "cd")    , ("celsius", str "°C") -  , ("centi", str "c")    , ("clight", emph (str "c") <> subscript (str "0"))    , ("coulomb", str "C")    , ("dalton", str "Da")    , ("day", str "d") -  , ("deca", str "d") -  , ("deci", str "d")    , ("decibel", str "db")    , ("degreeCelsius",str "°C")    , ("degree", str "°") -  , ("deka", str "d")    , ("electronmass", emph (str "m") <> subscript (str "e"))    , ("electronvolt", str "eV")    , ("elementarycharge", emph (str "e")) -  , ("exa", str "E")    , ("farad", str "F") -  , ("femto", str "f") -  , ("giga", str "G")    , ("gram", str "g")    , ("gray", str "Gy")    , ("hartree", emph (str "E") <> subscript (str "h"))    , ("hectare", str "ha") -  , ("hecto", str "h")    , ("henry", str "H")    , ("hertz", str "Hz")    , ("hour", str "h")    , ("joule", str "J")    , ("katal", str "kat")    , ("kelvin", str "K") -  , ("kilo", str "k")    , ("kilogram", str "kg")    , ("knot", str "kn")    , ("liter", str "L")    , ("litre", str "l")    , ("lumen", str "lm")    , ("lux", str "lx") -  , ("mega", str "M")    , ("meter", str "m")    , ("metre", str "m") -  , ("micro", str "μ") -  , ("milli", str "m")    , ("minute", str "min")    , ("mmHg", str "mmHg")    , ("mole", str "mol") -  , ("nano", str "n")    , ("nauticalmile", str "M")    , ("neper", str "Np")    , ("newton", str "N") @@ -329,25 +418,17 @@ siUnitMap = M.fromList    , ("Pa", str "Pa")    , ("pascal", str "Pa")    , ("percent", str "%") -  , ("per", str "/") -  , ("peta", str "P") -  , ("pico", str "p")    , ("planckbar", emph (str "\x210f"))    , ("radian", str "rad")    , ("second", str "s")    , ("siemens", str "S")    , ("sievert", str "Sv")    , ("steradian", str "sr") -  , ("tera", str "T")    , ("tesla", str "T")    , ("tonne", str "t")    , ("volt", str "V")    , ("watt", str "W")    , ("weber", str "Wb") -  , ("yocto", str "y") -  , ("yotta", str "Y") -  , ("zepto", str "z") -  , ("zetta", str "Z")    ] diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs new file mode 100644 index 000000000..f56728fe1 --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs @@ -0,0 +1,379 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE OverloadedStrings #-} +module Text.Pandoc.Readers.LaTeX.Table +  ( tableEnvironments ) +where + +import Data.Functor (($>)) +import Text.Pandoc.Class +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Readers.LaTeX.Types +import Text.Pandoc.Builder as B +import qualified Data.Map as M +import Data.Text (Text) +import Data.Maybe (fromMaybe) +import qualified Data.Text as T +import Control.Applicative ((<|>), optional, many) +import Control.Monad (when, void) +import Text.Pandoc.Shared (safeRead, trim) +import Text.Pandoc.Logging (LogMessage(SkippedContent)) +import Text.Pandoc.Walk (walkM) +import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline, +                            optional, space, spaces, withRaw, (<|>)) + +tableEnvironments :: PandocMonad m +                  => LP m Blocks +                  -> LP m Inlines +                  -> M.Map Text (LP m Blocks) +tableEnvironments blocks inline = +  M.fromList +  [ ("longtable",  env "longtable" $ +          resetCaption *> +            simpTable blocks inline "longtable" False >>= addTableCaption) +  , ("table",  env "table" $ +          skipopts *> resetCaption *> blocks >>= addTableCaption) +  , ("tabular*", env "tabular*" $ simpTable blocks inline "tabular*" True) +  , ("tabularx", env "tabularx" $ simpTable blocks inline "tabularx" True) +  , ("tabular", env "tabular"  $ simpTable blocks inline "tabular" False) +  ] + +hline :: PandocMonad m => LP m () +hline = try $ do +  spaces +  controlSeq "hline" <|> +    (controlSeq "cline" <* braced) <|> +    -- booktabs rules: +    controlSeq "toprule" <|> +    controlSeq "bottomrule" <|> +    controlSeq "midrule" <|> +    controlSeq "endhead" <|> +    controlSeq "endfirsthead" +  spaces +  optional rawopt +  return () + +lbreak :: PandocMonad m => LP m Tok +lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline") +         <* skipopts <* spaces + +amp :: PandocMonad m => LP m Tok +amp = symbol '&' + +-- Split a Word into individual Symbols (for parseAligns) +splitWordTok :: PandocMonad m => LP m () +splitWordTok = do +  inp <- getInput +  case inp of +       (Tok spos Word t : rest) -> +         setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest +       _ -> return () + +parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))] +parseAligns = try $ do +  let maybeBar = skipMany +        (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced))) +  let cAlign = AlignCenter <$ symbol 'c' +  let lAlign = AlignLeft <$ symbol 'l' +  let rAlign = AlignRight <$ symbol 'r' +  let parAlign = AlignLeft <$ symbol 'p' +  -- aligns from tabularx +  let xAlign = AlignLeft <$ symbol 'X' +  let mAlign = AlignLeft <$ symbol 'm' +  let bAlign = AlignLeft <$ symbol 'b' +  let alignChar = splitWordTok *> (  cAlign <|> lAlign <|> rAlign <|> parAlign +                                 <|> xAlign <|> mAlign <|> bAlign ) +  let alignPrefix = symbol '>' >> braced +  let alignSuffix = symbol '<' >> braced +  let colWidth = try $ do +        symbol '{' +        ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth") +        spaces +        symbol '}' +        return $ safeRead ds +  let alignSpec = do +        pref <- option [] alignPrefix +        spaces +        al <- alignChar +        width <- colWidth <|> option Nothing (do s <- untokenize <$> braced +                                                 pos <- getPosition +                                                 report $ SkippedContent s pos +                                                 return Nothing) +        spaces +        suff <- option [] alignSuffix +        return (al, width, (pref, suff)) +  let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro +        symbol '*' +        spaces +        ds <- trim . untokenize <$> braced +        spaces +        spec <- braced +        case safeRead ds of +             Just n  -> +               getInput >>= setInput . (mconcat (replicate n spec) ++) +             Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number" +  bgroup +  spaces +  maybeBar +  aligns' <- many $ try $ spaces >> optional starAlign >> +                            (alignSpec <* maybeBar) +  spaces +  egroup +  spaces +  return $ map toSpec aligns' +  where +    toColWidth (Just w) | w > 0 = ColWidth w +    toColWidth _                = ColWidthDefault +    toSpec (x, y, z) = (x, toColWidth y, z) + +-- N.B. this parser returns a Row that may have erroneous empty cells +-- in it. See the note above fixTableHead for details. +parseTableRow :: PandocMonad m +              => LP m Blocks -- ^ block parser +              -> LP m Inlines -- ^ inline parser +              -> Text   -- ^ table environment name +              -> [([Tok], [Tok])] -- ^ pref/suffixes +              -> LP m Row +parseTableRow blocks inline envname prefsufs = do +  notFollowedBy (spaces *> end_ envname) +  -- contexts that can contain & that is not colsep: +  let canContainAmp (Tok _ (CtrlSeq "begin") _) = True +      canContainAmp (Tok _ (CtrlSeq "verb") _)  = True +      canContainAmp (Tok _ (CtrlSeq "Verb") _)  = True +      canContainAmp _       = False +  -- add prefixes and suffixes in token stream: +  let celltoks (pref, suff) = do +        prefpos <- getPosition +        contents <- mconcat <$> +            many ( snd <$> withRaw +                     ((lookAhead (controlSeq "parbox") >> +                       void blocks) -- #5711 +                      <|> +                      (lookAhead (satisfyTok canContainAmp) >> void inline) +                      <|> +                      (lookAhead (symbol '$') >> void inline)) +                  <|> +                   (do notFollowedBy +                         (() <$ amp <|> () <$ lbreak <|> end_ envname) +                       count 1 anyTok) ) + +        suffpos <- getPosition +        option [] (count 1 amp) +        return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff +  rawcells <- mapM celltoks prefsufs +  cells <- mapM (parseFromToks (parseTableCell blocks)) rawcells +  spaces +  return $ Row nullAttr cells + +parseTableCell :: PandocMonad m => LP m Blocks -> LP m Cell +parseTableCell blocks = do +  spaces +  updateState $ \st -> st{ sInTableCell = True } +  cell' <-   multicolumnCell blocks +         <|> multirowCell blocks +         <|> parseSimpleCell +         <|> parseEmptyCell +  updateState $ \st -> st{ sInTableCell = False } +  spaces +  return cell' +  where +    -- The parsing of empty cells is important in LaTeX, especially when dealing +    -- with multirow/multicolumn. See #6603. +    parseEmptyCell = spaces $> emptyCell +    parseSimpleCell = simpleCell <$> (plainify <$> blocks) + + +cellAlignment :: PandocMonad m => LP m Alignment +cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|') +  where +    alignment = do +      c <- untoken <$> singleChar +      return $ case c of +        "l" -> AlignLeft +        "r" -> AlignRight +        "c" -> AlignCenter +        "*" -> AlignDefault +        _   -> AlignDefault + +plainify :: Blocks -> Blocks +plainify bs = case toList bs of +                [Para ils] -> plain (fromList ils) +                _          -> bs + +multirowCell :: PandocMonad m => LP m Blocks -> LP m Cell +multirowCell blocks = controlSeq "multirow" >> do +  -- Full prototype for \multirow macro is: +  --     \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text} +  -- However, everything except `nrows` and `text` make +  -- sense in the context of the Pandoc AST +  _ <- optional $ symbol '[' *> cellAlignment <* symbol ']'   -- vertical position +  nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced +  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- bigstrut-related +  _ <- symbol '{' *> manyTill anyTok (symbol '}')             -- Cell width +  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- Length used for fine-tuning +  content <- symbol '{' *> (plainify <$> blocks) <* symbol '}' +  return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content + +multicolumnCell :: PandocMonad m => LP m Blocks -> LP m Cell +multicolumnCell blocks = controlSeq "multicolumn" >> do +  span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced +  alignment <- symbol '{' *> cellAlignment <* symbol '}' + +  let singleCell = do +        content <- plainify <$> blocks +        return $ cell alignment (RowSpan 1) (ColSpan span') content + +  -- Two possible contents: either a \multirow cell, or content. +  -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}} +  -- Note that a \multirow cell can be nested in a \multicolumn, +  -- but not the other way around. See #6603 +  let nestedCell = do +        (Cell _ _ (RowSpan rs) _ bs) <- multirowCell blocks +        return $ cell +                  alignment +                  (RowSpan rs) +                  (ColSpan span') +                  (fromList bs) + +  symbol '{' *> (nestedCell <|> singleCell) <* symbol '}' + +-- LaTeX tables are stored with empty cells underneath multirow cells +-- denoting the grid spaces taken up by them. More specifically, if a +-- cell spans m rows, then it will overwrite all the cells in the +-- columns it spans for (m-1) rows underneath it, requiring padding +-- cells in these places. These padding cells need to be removed for +-- proper table reading. See #6603. +-- +-- These fixTable functions do not otherwise fix up malformed +-- input tables: that is left to the table builder. +fixTableHead :: TableHead -> TableHead +fixTableHead (TableHead attr rows) = TableHead attr rows' +  where +    rows' = fixTableRows rows + +fixTableBody :: TableBody -> TableBody +fixTableBody (TableBody attr rhc th tb) +  = TableBody attr rhc th' tb' +  where +    th' = fixTableRows th +    tb' = fixTableRows tb + +fixTableRows :: [Row] -> [Row] +fixTableRows = fixTableRows' $ repeat Nothing +  where +    fixTableRows' oldHang (Row attr cells : rs) +      = let (newHang, cells') = fixTableRow oldHang cells +            rs'               = fixTableRows' newHang rs +        in Row attr cells' : rs' +    fixTableRows' _ [] = [] + +-- The overhang is represented as Just (relative cell dimensions) or +-- Nothing for an empty grid space. +fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell]) +fixTableRow oldHang cells +  -- If there's overhang, drop cells until their total width meets the +  -- width of the occupied grid spaces (or we run out) +  | (n, prefHang, restHang) <- splitHang oldHang +  , n > 0 +  = let cells' = dropToWidth getCellW n cells +        (restHang', cells'') = fixTableRow restHang cells' +    in (prefHang restHang', cells'') +  -- Otherwise record the overhang of a pending cell and fix the rest +  -- of the row +  | c@(Cell _ _ h w _):cells' <- cells +  = let h' = max 1 h +        w' = max 1 w +        oldHang' = dropToWidth getHangW w' oldHang +        (newHang, cells'') = fixTableRow oldHang' cells' +    in (toHang w' h' <> newHang, c : cells'') +  | otherwise +  = (oldHang, []) +  where +    getCellW (Cell _ _ _ w _) = w +    getHangW = maybe 1 fst +    getCS (ColSpan n) = n + +    toHang c r +      | r > 1     = [Just (c, r)] +      | otherwise = replicate (getCS c) Nothing + +    -- Take the prefix of the overhang list representing filled grid +    -- spaces. Also return the remainder and the length of this prefix. +    splitHang = splitHang' 0 id + +    splitHang' !n l (Just (c, r):xs) +      = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs +    splitHang' n l xs = (n, l, xs) + +    -- Drop list items until the total width of the dropped items +    -- exceeds the passed width. +    dropToWidth _     n l | n < 1 = l +    dropToWidth wproj n (c:cs)    = dropToWidth wproj (n - wproj c) cs +    dropToWidth _     _ []        = [] + +simpTable :: PandocMonad m +          => LP m Blocks +          -> LP m Inlines +          -> Text +          -> Bool +          -> LP m Blocks +simpTable blocks inline envname hasWidthParameter = try $ do +  when hasWidthParameter $ () <$ tokWith inline +  skipopts +  colspecs <- parseAligns +  let (aligns, widths, prefsufs) = unzip3 colspecs +  optional $ controlSeq "caption" *> setCaption inline +  spaces +  optional label +  spaces +  optional lbreak +  spaces +  skipMany hline +  spaces +  header' <- option [] . try . fmap (:[]) $ +             parseTableRow blocks inline envname prefsufs <* +               lbreak <* many1 hline +  spaces +  rows <- sepEndBy (parseTableRow blocks inline envname prefsufs) +                    (lbreak <* optional (skipMany hline)) +  spaces +  optional $ controlSeq "caption" *> setCaption inline +  spaces +  optional label +  spaces +  optional lbreak +  spaces +  lookAhead $ controlSeq "end" -- make sure we're at end +  let th  = fixTableHead $ TableHead nullAttr header' +  let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows] +  let tf  = TableFoot nullAttr [] +  return $ table emptyCaption (zip aligns widths) th tbs tf + +addTableCaption :: PandocMonad m => Blocks -> LP m Blocks +addTableCaption = walkM go +  where go (Table attr c spec th tb tf) = do +          st <- getState +          let mblabel = sLastLabel st +          capt <- case (sCaption st, mblabel) of +                   (Just ils, Nothing)  -> return $ caption Nothing (plain ils) +                   (Just ils, Just lab) -> do +                     num <- getNextNumber sLastTableNum +                     setState +                       st{ sLastTableNum = num +                         , sLabels = M.insert lab +                                    [Str (renderDottedNum num)] +                                    (sLabels st) } +                     return $ caption Nothing (plain ils) -- add number?? +                   (Nothing, _)  -> return c +          let attr' = case (attr, mblabel) of +                        ((_,classes,kvs), Just ident) -> +                           (ident,classes,kvs) +                        _ -> attr +          return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf +        go x = return x + +-- TODO: For now we add a Div to contain table attributes, since +-- most writers don't do anything yet with attributes on Table. +-- This can be removed when that changes. +addAttrDiv :: Attr -> Block -> Block +addAttrDiv ("",[],[]) b = b +addAttrDiv attr b       = Div attr [b] diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs index a017a2afb..c20b72bc5 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Types.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs @@ -1,6 +1,7 @@ +{-# LANGUAGE FlexibleInstances #-}  {- |     Module      : Text.Pandoc.Readers.LaTeX.Types -   Copyright   : Copyright (C) 2017-2020 John MacFarlane +   Copyright   : Copyright (C) 2017-2021 John MacFarlane     License     : GNU GPL, version 2 or above     Maintainer  : John MacFarlane <jgm@berkeley.edu> @@ -18,7 +19,9 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)                                         )  where  import Data.Text (Text) -import Text.Parsec.Pos (SourcePos) +import Text.Parsec.Pos (SourcePos, sourceName) +import Text.Pandoc.Sources +import Data.List (groupBy)  data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |                 Esc1    | Esc2   | Arg Int @@ -27,6 +30,16 @@ data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |  data Tok = Tok SourcePos TokType Text       deriving (Eq, Ord, Show) +instance ToSources [Tok] where +  toSources = Sources +    . map (\ts -> case ts of +                    Tok p _ _ : _ -> (p, mconcat $ map tokToText ts) +                    _ -> error "toSources [Tok] encountered empty group") +    . groupBy (\(Tok p1 _ _) (Tok p2 _ _) -> sourceName p1 == sourceName p2) + +tokToText :: Tok -> Text +tokToText (Tok _ _ t) = t +  data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed       deriving (Eq, Ord, Show) | 
