diff options
| author | Igor Pashev <pashev.igor@gmail.com> | 2021-07-17 18:10:34 +0200 |
|---|---|---|
| committer | Igor Pashev <pashev.igor@gmail.com> | 2021-07-17 18:46:16 +0200 |
| commit | 48459559a13a20083fc9b31eb523b8ea2bf0a63f (patch) | |
| tree | 1c04e75709457403110a6f8c5c90099f22369de3 /src/Text/Pandoc/Writers/LaTeX | |
| parent | 0c39509d9b6a58958228cebf5d643598e5c98950 (diff) | |
| parent | 46099e79defe662e541b12548200caf29063c1c6 (diff) | |
| download | pandoc-48459559a13a20083fc9b31eb523b8ea2bf0a63f.tar.gz | |
Merge branch 'master' of https://github.com/jgm/pandoc
Diffstat (limited to 'src/Text/Pandoc/Writers/LaTeX')
| -rw-r--r-- | src/Text/Pandoc/Writers/LaTeX/Caption.hs | 47 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/LaTeX/Citation.hs | 181 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/LaTeX/Lang.hs | 192 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/LaTeX/Notes.hs | 34 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/LaTeX/Table.hs | 307 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/LaTeX/Types.hs | 83 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/LaTeX/Util.hs | 275 |
7 files changed, 1119 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Writers/LaTeX/Caption.hs b/src/Text/Pandoc/Writers/LaTeX/Caption.hs new file mode 100644 index 000000000..ab4d365cc --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX/Caption.hs @@ -0,0 +1,47 @@ +{- | + Module : Text.Pandoc.Writers.LaTeX.Caption + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Write figure or table captions as LaTeX. +-} +module Text.Pandoc.Writers.LaTeX.Caption + ( getCaption + ) where + +import Control.Monad.State.Strict +import Data.Monoid (Any(..)) +import Data.Text (Text) +import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Definition +import Text.DocLayout (Doc, brackets, empty) +import Text.Pandoc.Shared +import Text.Pandoc.Walk +import Text.Pandoc.Writers.LaTeX.Notes (notesToLaTeX) +import Text.Pandoc.Writers.LaTeX.Types + ( LW, WriterState (stExternalNotes, stNotes) ) + +getCaption :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> Bool -> [Inline] + -> LW m (Doc Text, Doc Text, Doc Text) +getCaption inlineListToLaTeX externalNotes txt = do + oldExternalNotes <- gets stExternalNotes + modify $ \st -> st{ stExternalNotes = externalNotes, stNotes = [] } + capt <- inlineListToLaTeX txt + footnotes <- if externalNotes + then notesToLaTeX <$> gets stNotes + else return empty + modify $ \st -> st{ stExternalNotes = oldExternalNotes, stNotes = [] } + -- We can't have footnotes in the list of figures/tables, so remove them: + let getNote (Note _) = Any True + getNote _ = Any False + let hasNotes = getAny . query getNote + captForLof <- if hasNotes txt + then brackets <$> inlineListToLaTeX (walk deNote txt) + else return empty + return (capt, captForLof, footnotes) diff --git a/src/Text/Pandoc/Writers/LaTeX/Citation.hs b/src/Text/Pandoc/Writers/LaTeX/Citation.hs new file mode 100644 index 000000000..f48a43d7a --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX/Citation.hs @@ -0,0 +1,181 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.LaTeX.Citation + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable +-} +module Text.Pandoc.Writers.LaTeX.Citation + ( citationsToNatbib, + citationsToBiblatex + ) where + +import Data.Text (Text) +import Data.Char (isPunctuation) +import qualified Data.Text as T +import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Definition +import Data.List (foldl') +import Text.DocLayout (Doc, brackets, empty, (<+>), text, isEmpty, literal, + braces) +import Text.Pandoc.Walk +import Text.Pandoc.Writers.LaTeX.Types ( LW ) + +citationsToNatbib :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> [Citation] + -> LW m (Doc Text) +citationsToNatbib inlineListToLaTeX [one] + = citeCommand inlineListToLaTeX c p s k + where + Citation { citationId = k + , citationPrefix = p + , citationSuffix = s + , citationMode = m + } + = one + c = case m of + AuthorInText -> "citet" + SuppressAuthor -> "citeyearpar" + NormalCitation -> "citep" + +citationsToNatbib inlineListToLaTeX cits + | noPrefix (tail cits) && noSuffix (init cits) && ismode NormalCitation cits + = citeCommand inlineListToLaTeX "citep" p s ks + where + noPrefix = all (null . citationPrefix) + noSuffix = all (null . citationSuffix) + ismode m = all ((==) m . citationMode) + p = citationPrefix $ + head cits + s = citationSuffix $ + last cits + ks = T.intercalate ", " $ map citationId cits + +citationsToNatbib inlineListToLaTeX (c:cs) + | citationMode c == AuthorInText = do + author <- citeCommand inlineListToLaTeX "citeauthor" [] [] (citationId c) + cits <- citationsToNatbib inlineListToLaTeX + (c { citationMode = SuppressAuthor } : cs) + return $ author <+> cits + +citationsToNatbib inlineListToLaTeX cits = do + cits' <- mapM convertOne cits + return $ text "\\citetext{" <> foldl' combineTwo empty cits' <> text "}" + where + citeCommand' = citeCommand inlineListToLaTeX + combineTwo a b | isEmpty a = b + | otherwise = a <> text "; " <> b + convertOne Citation { citationId = k + , citationPrefix = p + , citationSuffix = s + , citationMode = m + } + = case m of + AuthorInText -> citeCommand' "citealt" p s k + SuppressAuthor -> citeCommand' "citeyear" p s k + NormalCitation -> citeCommand' "citealp" p s k + +citeCommand :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> Text + -> [Inline] + -> [Inline] + -> Text + -> LW m (Doc Text) +citeCommand inlineListToLaTeX c p s k = do + args <- citeArguments inlineListToLaTeX p s k + return $ literal ("\\" <> c) <> args + +type Prefix = [Inline] +type Suffix = [Inline] +type CiteId = Text +data CiteGroup = CiteGroup Prefix Suffix [CiteId] + +citeArgumentsList :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> CiteGroup + -> LW m (Doc Text) +citeArgumentsList _inlineListToLaTeX (CiteGroup _ _ []) = return empty +citeArgumentsList inlineListToLaTeX (CiteGroup pfxs sfxs ids) = do + pdoc <- inlineListToLaTeX pfxs + sdoc <- inlineListToLaTeX sfxs' + return $ optargs pdoc sdoc <> + braces (literal (T.intercalate "," (reverse ids))) + where sfxs' = stripLocatorBraces $ case sfxs of + (Str t : r) -> case T.uncons t of + Just (x, xs) + | T.null xs + , isPunctuation x -> dropWhile (== Space) r + | isPunctuation x -> Str xs : r + _ -> sfxs + _ -> sfxs + optargs pdoc sdoc = case (isEmpty pdoc, isEmpty sdoc) of + (True, True ) -> empty + (True, False) -> brackets sdoc + (_ , _ ) -> brackets pdoc <> brackets sdoc + +citeArguments :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> [Inline] + -> [Inline] + -> Text + -> LW m (Doc Text) +citeArguments inlineListToLaTeX p s k = + citeArgumentsList inlineListToLaTeX (CiteGroup p s [k]) + +-- strip off {} used to define locator in pandoc-citeproc; see #5722 +stripLocatorBraces :: [Inline] -> [Inline] +stripLocatorBraces = walk go + where go (Str xs) = Str $ T.filter (\c -> c /= '{' && c /= '}') xs + go x = x + +citationsToBiblatex :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> [Citation] -> LW m (Doc Text) +citationsToBiblatex inlineListToLaTeX [one] + = citeCommand inlineListToLaTeX cmd p s k + where + Citation { citationId = k + , citationPrefix = p + , citationSuffix = s + , citationMode = m + } = one + cmd = case m of + SuppressAuthor -> "autocite*" + AuthorInText -> "textcite" + NormalCitation -> "autocite" + +citationsToBiblatex inlineListToLaTeX (c:cs) + | all (\cit -> null (citationPrefix cit) && null (citationSuffix cit)) (c:cs) + = do + let cmd = case citationMode c of + SuppressAuthor -> "\\autocite*" + AuthorInText -> "\\textcite" + NormalCitation -> "\\autocite" + return $ text cmd <> + braces (literal (T.intercalate "," (map citationId (c:cs)))) + | otherwise + = do + let cmd = case citationMode c of + SuppressAuthor -> "\\autocites*" + AuthorInText -> "\\textcites" + NormalCitation -> "\\autocites" + + groups <- mapM (citeArgumentsList inlineListToLaTeX) + (reverse (foldl' grouper [] (c:cs))) + + return $ text cmd <> mconcat groups + + where grouper prev cit = case prev of + ((CiteGroup oPfx oSfx ids):rest) + | null oSfx && null pfx -> CiteGroup oPfx sfx (cid:ids) : rest + _ -> CiteGroup pfx sfx [cid] : prev + where pfx = citationPrefix cit + sfx = citationSuffix cit + cid = citationId cit + +citationsToBiblatex _ _ = return empty diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs new file mode 100644 index 000000000..0ba68b74e --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs @@ -0,0 +1,192 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{- | + Module : Text.Pandoc.Writers.LaTeX.Lang + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable +-} +module Text.Pandoc.Writers.LaTeX.Lang + ( toPolyglossiaEnv, + toPolyglossia, + toBabel + ) where +import Data.Text (Text) +import Text.Collate.Lang (Lang(..)) + + +-- In environments \Arabic instead of \arabic is used +toPolyglossiaEnv :: Lang -> (Text, Text) +toPolyglossiaEnv l = + case toPolyglossia l of + ("arabic", o) -> ("Arabic", o) + x -> x + +-- Takes a list of the constituents of a BCP47 language code and +-- converts it to a Polyglossia (language, options) tuple +-- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf +toPolyglossia :: Lang -> (Text, Text) +toPolyglossia (Lang "ar" _ (Just "DZ") _ _ _) = ("arabic", "locale=algeria") +toPolyglossia (Lang "ar" _ (Just "IQ") _ _ _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ (Just "JO") _ _ _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ (Just "LB") _ _ _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ (Just "LY") _ _ _) = ("arabic", "locale=libya") +toPolyglossia (Lang "ar" _ (Just "MA") _ _ _) = ("arabic", "locale=morocco") +toPolyglossia (Lang "ar" _ (Just "MR") _ _ _) = ("arabic", "locale=mauritania") +toPolyglossia (Lang "ar" _ (Just "PS") _ _ _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ (Just "SY") _ _ _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ (Just "TN") _ _ _) = ("arabic", "locale=tunisia") +toPolyglossia (Lang "de" _ _ vars _ _) + | "1901" `elem` vars = ("german", "spelling=old") +toPolyglossia (Lang "de" _ (Just "AT") vars _ _) + | "1901" `elem` vars = ("german", "variant=austrian, spelling=old") +toPolyglossia (Lang "de" _ (Just "AT") _ _ _) = ("german", "variant=austrian") +toPolyglossia (Lang "de" _ (Just "CH") vars _ _) + | "1901" `elem` vars = ("german", "variant=swiss, spelling=old") +toPolyglossia (Lang "de" _ (Just "CH") _ _ _) = ("german", "variant=swiss") +toPolyglossia (Lang "de" _ _ _ _ _) = ("german", "") +toPolyglossia (Lang "dsb" _ _ _ _ _) = ("lsorbian", "") +toPolyglossia (Lang "el" _ _ vars _ _) + | "polyton" `elem` vars = ("greek", "variant=poly") +toPolyglossia (Lang "en" _ (Just "AU") _ _ _) = ("english", "variant=australian") +toPolyglossia (Lang "en" _ (Just "CA") _ _ _) = ("english", "variant=canadian") +toPolyglossia (Lang "en" _ (Just "GB") _ _ _) = ("english", "variant=british") +toPolyglossia (Lang "en" _ (Just "NZ") _ _ _) = ("english", "variant=newzealand") +toPolyglossia (Lang "en" _ (Just "UK") _ _ _) = ("english", "variant=british") +toPolyglossia (Lang "en" _ (Just "US") _ _ _) = ("english", "variant=american") +toPolyglossia (Lang "grc" _ _ _ _ _) = ("greek", "variant=ancient") +toPolyglossia (Lang "hsb" _ _ _ _ _) = ("usorbian", "") +toPolyglossia (Lang "la" _ _ vars _ _) + | "x-classic" `elem` vars = ("latin", "variant=classic") +toPolyglossia (Lang "pt" _ (Just "BR") _ _ _) = ("portuguese", "variant=brazilian") +toPolyglossia (Lang "sl" _ _ _ _ _) = ("slovenian", "") +toPolyglossia x = (commonFromBcp47 x, "") + +-- Takes a list of the constituents of a BCP47 language code and +-- converts it to a Babel language string. +-- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf +-- List of supported languages (slightly outdated): +-- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf +toBabel :: Lang -> Text +toBabel (Lang "de" _ (Just "AT") vars _ _) + | "1901" `elem` vars = "austrian" + | otherwise = "naustrian" +toBabel (Lang "de" _ (Just "CH") vars _ _) + | "1901" `elem` vars = "swissgerman" + | otherwise = "nswissgerman" +toBabel (Lang "de" _ _ vars _ _) + | "1901" `elem` vars = "german" + | otherwise = "ngerman" +toBabel (Lang "dsb" _ _ _ _ _) = "lowersorbian" +toBabel (Lang "el" _ _ vars _ _) + | "polyton" `elem` vars = "polutonikogreek" +toBabel (Lang "en" _ (Just "AU") _ _ _) = "australian" +toBabel (Lang "en" _ (Just "CA") _ _ _) = "canadian" +toBabel (Lang "en" _ (Just "GB") _ _ _) = "british" +toBabel (Lang "en" _ (Just "NZ") _ _ _) = "newzealand" +toBabel (Lang "en" _ (Just "UK") _ _ _) = "british" +toBabel (Lang "en" _ (Just "US") _ _ _) = "american" +toBabel (Lang "fr" _ (Just "CA") _ _ _) = "canadien" +toBabel (Lang "fra" _ _ vars _ _) + | "aca" `elem` vars = "acadian" +toBabel (Lang "grc" _ _ _ _ _) = "polutonikogreek" +toBabel (Lang "hsb" _ _ _ _ _) = "uppersorbian" +toBabel (Lang "la" _ _ vars _ _) + | "x-classic" `elem` vars = "classiclatin" +toBabel (Lang "pt" _ (Just "BR") _ _ _) = "brazilian" +toBabel (Lang "sl" _ _ _ _ _) = "slovene" +toBabel x = commonFromBcp47 x + +-- Takes a list of the constituents of a BCP47 language code +-- and converts it to a string shared by Babel and Polyglossia. +-- https://tools.ietf.org/html/bcp47#section-2.1 +commonFromBcp47 :: Lang -> Text +commonFromBcp47 (Lang "sr" (Just "Cyrl") _ _ _ _) = "serbianc" +commonFromBcp47 (Lang "zh" (Just "Latn") _ vars _ _) + | "pinyin" `elem` vars = "pinyin" +commonFromBcp47 (Lang l _ _ _ _ _) = fromIso l + where + fromIso "af" = "afrikaans" + fromIso "am" = "amharic" + fromIso "ar" = "arabic" + fromIso "as" = "assamese" + fromIso "ast" = "asturian" + fromIso "bg" = "bulgarian" + fromIso "bn" = "bengali" + fromIso "bo" = "tibetan" + fromIso "br" = "breton" + fromIso "ca" = "catalan" + fromIso "cy" = "welsh" + fromIso "cs" = "czech" + fromIso "cop" = "coptic" + fromIso "da" = "danish" + fromIso "dv" = "divehi" + fromIso "el" = "greek" + fromIso "en" = "english" + fromIso "eo" = "esperanto" + fromIso "es" = "spanish" + fromIso "et" = "estonian" + fromIso "eu" = "basque" + fromIso "fa" = "farsi" + fromIso "fi" = "finnish" + fromIso "fr" = "french" + fromIso "fur" = "friulan" + fromIso "ga" = "irish" + fromIso "gd" = "scottish" + fromIso "gez" = "ethiopic" + fromIso "gl" = "galician" + fromIso "he" = "hebrew" + fromIso "hi" = "hindi" + fromIso "hr" = "croatian" + fromIso "hu" = "magyar" + fromIso "hy" = "armenian" + fromIso "ia" = "interlingua" + fromIso "id" = "indonesian" + fromIso "ie" = "interlingua" + fromIso "is" = "icelandic" + fromIso "it" = "italian" + fromIso "ja" = "japanese" + fromIso "km" = "khmer" + fromIso "kmr" = "kurmanji" + fromIso "kn" = "kannada" + fromIso "ko" = "korean" + fromIso "la" = "latin" + fromIso "lo" = "lao" + fromIso "lt" = "lithuanian" + fromIso "lv" = "latvian" + fromIso "ml" = "malayalam" + fromIso "mn" = "mongolian" + fromIso "mr" = "marathi" + fromIso "nb" = "norsk" + fromIso "nl" = "dutch" + fromIso "nn" = "nynorsk" + fromIso "no" = "norsk" + fromIso "nqo" = "nko" + fromIso "oc" = "occitan" + fromIso "pa" = "panjabi" + fromIso "pl" = "polish" + fromIso "pms" = "piedmontese" + fromIso "pt" = "portuguese" + fromIso "rm" = "romansh" + fromIso "ro" = "romanian" + fromIso "ru" = "russian" + fromIso "sa" = "sanskrit" + fromIso "se" = "samin" + fromIso "sk" = "slovak" + fromIso "sq" = "albanian" + fromIso "sr" = "serbian" + fromIso "sv" = "swedish" + fromIso "syr" = "syriac" + fromIso "ta" = "tamil" + fromIso "te" = "telugu" + fromIso "th" = "thai" + fromIso "ti" = "ethiopic" + fromIso "tk" = "turkmen" + fromIso "tr" = "turkish" + fromIso "uk" = "ukrainian" + fromIso "ur" = "urdu" + fromIso "vi" = "vietnamese" + fromIso _ = "" diff --git a/src/Text/Pandoc/Writers/LaTeX/Notes.hs b/src/Text/Pandoc/Writers/LaTeX/Notes.hs new file mode 100644 index 000000000..f225ef0c5 --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX/Notes.hs @@ -0,0 +1,34 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.LaTeX.Notes + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Output tables as LaTeX. +-} +module Text.Pandoc.Writers.LaTeX.Notes + ( notesToLaTeX + ) where + +import Data.List (intersperse) +import Text.DocLayout ( Doc, braces, empty, text, vcat, ($$)) +import Data.Text (Text) + +notesToLaTeX :: [Doc Text] -> Doc Text +notesToLaTeX = \case + [] -> empty + ns -> (case length ns of + n | n > 1 -> "\\addtocounter" <> + braces "footnote" <> + braces (text $ show $ 1 - n) + | otherwise -> empty) + $$ + vcat (intersperse + ("\\addtocounter" <> braces "footnote" <> braces "1") + $ map (\x -> "\\footnotetext" <> braces x) + $ reverse ns) diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs new file mode 100644 index 000000000..27a8a0257 --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs @@ -0,0 +1,307 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.LaTeX.Table + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Output LaTeX formatted tables. +-} +module Text.Pandoc.Writers.LaTeX.Table + ( tableToLaTeX + ) where +import Control.Monad.State.Strict +import Data.List (intersperse) +import qualified Data.List.NonEmpty as NonEmpty +import Data.List.NonEmpty (NonEmpty ((:|))) +import Data.Text (Text) +import qualified Data.Text as T +import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Definition +import Text.DocLayout + ( Doc, braces, cr, empty, hcat, hsep, isEmpty, literal, nest + , text, vcat, ($$) ) +import Text.Pandoc.Shared (blocksToInlines, splitBy, tshow) +import Text.Pandoc.Walk (walk, query) +import Data.Monoid (Any(..)) +import Text.Pandoc.Writers.LaTeX.Caption (getCaption) +import Text.Pandoc.Writers.LaTeX.Notes (notesToLaTeX) +import Text.Pandoc.Writers.LaTeX.Types + ( LW, WriterState (stBeamer, stExternalNotes, stInMinipage, stMultiRow + , stNotes, stTable) ) +import Text.Printf (printf) +import qualified Text.Pandoc.Builder as B +import qualified Text.Pandoc.Writers.AnnotatedTable as Ann + +tableToLaTeX :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> ([Block] -> LW m (Doc Text)) + -> Ann.Table + -> LW m (Doc Text) +tableToLaTeX inlnsToLaTeX blksToLaTeX tbl = do + let (Ann.Table _attr caption _specs thead tbodies tfoot) = tbl + CaptionDocs capt captNotes <- captionToLaTeX inlnsToLaTeX caption + let removeNote (Note _) = Span ("", [], []) [] + removeNote x = x + firsthead <- if isEmpty capt || isEmptyHead thead + then return empty + else ($$ text "\\endfirsthead") <$> + headToLaTeX blksToLaTeX thead + head' <- if isEmptyHead thead + then return "\\toprule" + -- avoid duplicate notes in head and firsthead: + else headToLaTeX blksToLaTeX + (if isEmpty firsthead + then thead + else walk removeNote thead) + rows' <- mapM (rowToLaTeX blksToLaTeX BodyCell) $ + mconcat (map bodyRows tbodies) <> footRows tfoot + modify $ \s -> s{ stTable = True } + notes <- notesToLaTeX <$> gets stNotes + return + $ "\\begin{longtable}[]" <> + braces ("@{}" <> colDescriptors tbl <> "@{}") + -- the @{} removes extra space at beginning and end + $$ capt + $$ firsthead + $$ head' + $$ "\\endhead" + $$ vcat rows' + $$ "\\bottomrule" + $$ "\\end{longtable}" + $$ captNotes + $$ notes + +-- | Creates column descriptors for the table. +colDescriptors :: Ann.Table -> Doc Text +colDescriptors (Ann.Table _attr _caption specs thead tbodies tfoot) = + let (aligns, widths) = unzip specs + + defaultWidthsOnly = all (== ColWidthDefault) widths + isSimpleTable = all (all isSimpleCell) $ mconcat + [ headRows thead + , concatMap bodyRows tbodies + , footRows tfoot + ] + + relativeWidths = if defaultWidthsOnly + then replicate (length specs) + (1 / fromIntegral (length specs)) + else map toRelWidth widths + in if defaultWidthsOnly && isSimpleTable + then hcat $ map (literal . colAlign) aligns + else (cr <>) . nest 2 . vcat . map literal $ + zipWith (toColDescriptor (length specs)) + aligns + relativeWidths + where + toColDescriptor :: Int -> Alignment -> Double -> Text + toColDescriptor numcols align width = + T.pack $ printf + ">{%s\\arraybackslash}p{(\\columnwidth - %d\\tabcolsep) * \\real{%0.2f}}" + (T.unpack (alignCommand align)) + ((numcols - 1) * 2) + width + + isSimpleCell (Ann.Cell _ _ (Cell _attr _align _rowspan _colspan blocks)) = + case blocks of + [Para _] -> True + [Plain _] -> True + [] -> True + _ -> False + + toRelWidth ColWidthDefault = 0 + toRelWidth (ColWidth w) = w + +alignCommand :: Alignment -> Text +alignCommand = \case + AlignLeft -> "\\raggedright" + AlignRight -> "\\raggedleft" + AlignCenter -> "\\centering" + AlignDefault -> "\\raggedright" + +colAlign :: Alignment -> Text +colAlign = \case + AlignLeft -> "l" + AlignRight -> "r" + AlignCenter -> "c" + AlignDefault -> "l" + +data CaptionDocs = + CaptionDocs + { captionCommand :: Doc Text + , captionNotes :: Doc Text + } + +captionToLaTeX :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> Caption + -> LW m CaptionDocs +captionToLaTeX inlnsToLaTeX (Caption _maybeShort longCaption) = do + let caption = blocksToInlines longCaption + (captionText, captForLof, captNotes) <- getCaption inlnsToLaTeX False caption + return $ CaptionDocs + { captionNotes = captNotes + , captionCommand = if isEmpty captionText + then empty + else "\\caption" <> captForLof <> + braces captionText <> "\\tabularnewline" + } + +type BlocksWriter m = [Block] -> LW m (Doc Text) + +headToLaTeX :: PandocMonad m + => BlocksWriter m + -> Ann.TableHead + -> LW m (Doc Text) +headToLaTeX blocksWriter (Ann.TableHead _attr headerRows) = do + rowsContents <- mapM (rowToLaTeX blocksWriter HeaderCell . headerRowCells) + headerRows + return ("\\toprule" $$ vcat rowsContents $$ "\\midrule") + +-- | Converts a row of table cells into a LaTeX row. +rowToLaTeX :: PandocMonad m + => BlocksWriter m + -> CellType + -> [Ann.Cell] + -> LW m (Doc Text) +rowToLaTeX blocksWriter celltype row = do + cellsDocs <- mapM (cellToLaTeX blocksWriter celltype) (fillRow row) + return $ hsep (intersperse "&" cellsDocs) <> " \\\\" + +-- | Pads row with empty cells to adjust for rowspans above this row. +fillRow :: [Ann.Cell] -> [Ann.Cell] +fillRow = go 0 + where + go _ [] = [] + go n (acell@(Ann.Cell _spec (Ann.ColNumber colnum) cell):cells) = + let (Cell _ _ _ (ColSpan colspan) _) = cell + in map mkEmptyCell [n .. colnum - 1] ++ + acell : go (colnum + colspan) cells + + mkEmptyCell :: Int -> Ann.Cell + mkEmptyCell colnum = + Ann.Cell ((AlignDefault, ColWidthDefault):|[]) + (Ann.ColNumber colnum) + B.emptyCell + +isEmptyHead :: Ann.TableHead -> Bool +isEmptyHead (Ann.TableHead _attr []) = True +isEmptyHead (Ann.TableHead _attr rows) = all (null . headerRowCells) rows + +-- | Gets all cells in a header row. +headerRowCells :: Ann.HeaderRow -> [Ann.Cell] +headerRowCells (Ann.HeaderRow _attr _rownum cells) = cells + +-- | Gets all cells in a body row. +bodyRowCells :: Ann.BodyRow -> [Ann.Cell] +bodyRowCells (Ann.BodyRow _attr _rownum rowhead cells) = rowhead <> cells + +-- | Gets a list of rows of the table body, where a row is a simple +-- list of cells. +bodyRows :: Ann.TableBody -> [[Ann.Cell]] +bodyRows (Ann.TableBody _attr _rowheads headerRows rows) = + map headerRowCells headerRows <> map bodyRowCells rows + +-- | Gets a list of rows of the table head, where a row is a simple +-- list of cells. +headRows :: Ann.TableHead -> [[Ann.Cell]] +headRows (Ann.TableHead _attr rows) = map headerRowCells rows + +-- | Gets a list of rows from the foot, where a row is a simple list +-- of cells. +footRows :: Ann.TableFoot -> [[Ann.Cell]] +footRows (Ann.TableFoot _attr rows) = map headerRowCells rows + +-- For simple latex tables (without minipages or parboxes), +-- we need to go to some lengths to get line breaks working: +-- as LineBreak bs = \vtop{\hbox{\strut as}\hbox{\strut bs}}. +fixLineBreaks :: Block -> Block +fixLineBreaks = walk fixLineBreaks' + +fixLineBreaks' :: [Inline] -> [Inline] +fixLineBreaks' ils = case splitBy (== LineBreak) ils of + [] -> [] + [xs] -> xs + chunks -> RawInline "tex" "\\vtop{" : + concatMap tohbox chunks <> + [RawInline "tex" "}"] + where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys <> + [RawInline "tex" "}"] + +-- We also change display math to inline math, since display +-- math breaks in simple tables. +displayMathToInline :: Inline -> Inline +displayMathToInline (Math DisplayMath x) = Math InlineMath x +displayMathToInline x = x + +cellToLaTeX :: PandocMonad m + => BlocksWriter m + -> CellType + -> Ann.Cell + -> LW m (Doc Text) +cellToLaTeX blockListToLaTeX celltype annotatedCell = do + let (Ann.Cell specs _colnum cell) = annotatedCell + let hasWidths = snd (NonEmpty.head specs) /= ColWidthDefault + let specAlign = fst (NonEmpty.head specs) + let (Cell _attr align' rowspan colspan blocks) = cell + let align = case align' of + AlignDefault -> specAlign + _ -> align' + beamer <- gets stBeamer + externalNotes <- gets stExternalNotes + inMinipage <- gets stInMinipage + -- See #5367 -- footnotehyper/footnote don't work in beamer, + -- so we need to produce the notes outside the table... + modify $ \st -> st{ stExternalNotes = beamer } + let isPlainOrPara = \case + Para{} -> True + Plain{} -> True + _ -> False + let hasLineBreak LineBreak = Any True + hasLineBreak _ = Any False + let hasLineBreaks = getAny $ query hasLineBreak blocks + result <- + if not hasWidths || (celltype /= HeaderCell + && all isPlainOrPara blocks + && not hasLineBreaks) + then + blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks + else do + modify $ \st -> st{ stInMinipage = True } + cellContents <- blockListToLaTeX blocks + modify $ \st -> st{ stInMinipage = inMinipage } + let valign = text $ case celltype of + HeaderCell -> "[b]" + BodyCell -> "[t]" + let halign = literal $ alignCommand align + return $ "\\begin{minipage}" <> valign <> + braces "\\linewidth" <> halign <> cr <> + cellContents <> + (if hasLineBreaks then "\\strut" else mempty) + <> cr <> + "\\end{minipage}" + modify $ \st -> st{ stExternalNotes = externalNotes } + when (rowspan /= RowSpan 1) $ + modify (\st -> st{ stMultiRow = True }) + let inMultiColumn x = case colspan of + (ColSpan 1) -> x + (ColSpan n) -> "\\multicolumn" + <> braces (literal (tshow n)) + <> braces (literal $ colAlign align) + <> braces x + let inMultiRow x = case rowspan of + (RowSpan 1) -> x + (RowSpan n) -> let nrows = literal (tshow n) + in "\\multirow" <> braces nrows + <> braces "*" <> braces x + return . inMultiColumn . inMultiRow $ result + +data CellType + = HeaderCell + | BodyCell + deriving Eq diff --git a/src/Text/Pandoc/Writers/LaTeX/Types.hs b/src/Text/Pandoc/Writers/LaTeX/Types.hs new file mode 100644 index 000000000..c06b7e923 --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX/Types.hs @@ -0,0 +1,83 @@ +module Text.Pandoc.Writers.LaTeX.Types + ( LW + , WriterState (..) + , startingState + ) where + +import Control.Monad.State.Strict (StateT) +import Data.Text (Text) +import Text.DocLayout (Doc) +import Text.Pandoc.Options + ( WriterOptions (writerIncremental, writerTopLevelDivision) + , TopLevelDivision (..) + ) + +-- | LaTeX writer type. The type constructor @m@ will typically be an +-- instance of PandocMonad. +type LW m = StateT WriterState m + +data WriterState = + WriterState + { stInNote :: Bool -- ^ true if we're in a note + , stInQuote :: Bool -- ^ true if in a blockquote + , stExternalNotes :: Bool -- ^ true if in context where + -- we need to store footnotes + , stInMinipage :: Bool -- ^ true if in minipage + , stInHeading :: Bool -- ^ true if in a section heading + , stInItem :: Bool -- ^ true if in \item[..] + , stNotes :: [Doc Text] -- ^ notes in a minipage + , stOLLevel :: Int -- ^ level of ordered list nesting + , stOptions :: WriterOptions -- ^ writer options, so they don't have to + -- be parameter + , stVerbInNote :: Bool -- ^ true if document has verbatim text in note + , stTable :: Bool -- ^ true if document has a table + , stMultiRow :: Bool -- ^ true if document has multirow cells + , stStrikeout :: Bool -- ^ true if document has strikeout + , stUrl :: Bool -- ^ true if document has visible URL link + , stGraphics :: Bool -- ^ true if document contains images + , stLHS :: Bool -- ^ true if document has literate haskell code + , stHasChapters :: Bool -- ^ true if document has chapters + , stCsquotes :: Bool -- ^ true if document uses csquotes + , stHighlighting :: Bool -- ^ true if document has highlighted code + , stIncremental :: Bool -- ^ true if beamer lists should be + , stZwnj :: Bool -- ^ true if document has a ZWNJ character + , stInternalLinks :: [Text] -- ^ list of internal link targets + , stBeamer :: Bool -- ^ produce beamer + , stEmptyLine :: Bool -- ^ true if no content on line + , stHasCslRefs :: Bool -- ^ has a Div with class refs + , stIsFirstInDefinition :: Bool -- ^ first block in a defn list + } + +startingState :: WriterOptions -> WriterState +startingState options = + WriterState + { stInNote = False + , stInQuote = False + , stExternalNotes = False + , stInHeading = False + , stInMinipage = False + , stInItem = False + , stNotes = [] + , stOLLevel = 1 + , stOptions = options + , stVerbInNote = False + , stTable = False + , stMultiRow = False + , stStrikeout = False + , stUrl = False + , stGraphics = False + , stLHS = False + , stHasChapters = case writerTopLevelDivision options of + TopLevelPart -> True + TopLevelChapter -> True + _ -> False + , stCsquotes = False + , stHighlighting = False + , stIncremental = writerIncremental options + , stZwnj = False + , stInternalLinks = [] + , stBeamer = False + , stEmptyLine = True + , stHasCslRefs = False + , stIsFirstInDefinition = False + } diff --git a/src/Text/Pandoc/Writers/LaTeX/Util.hs b/src/Text/Pandoc/Writers/LaTeX/Util.hs new file mode 100644 index 000000000..c34338121 --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX/Util.hs @@ -0,0 +1,275 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.LaTeX.Util + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable +-} +module Text.Pandoc.Writers.LaTeX.Util ( + stringToLaTeX + , StringContext(..) + , toLabel + , inCmd + , wrapDiv + , hypertarget + , labelFor + , getListingsLanguage + , mbBraced + ) +where + +import Control.Applicative ((<|>)) +import Control.Monad (when) +import Text.Pandoc.Class (PandocMonad, toLang) +import Text.Pandoc.Options (WriterOptions(..), isEnabled) +import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState(..)) +import Text.Pandoc.Writers.LaTeX.Lang (toPolyglossiaEnv) +import Text.Pandoc.Highlighting (toListingsLanguage) +import Text.DocLayout +import Text.Pandoc.Definition +import Text.Pandoc.ImageSize (showFl) +import Control.Monad.State.Strict (gets, modify) +import Data.Text (Text) +import qualified Data.Text as T +import Text.Pandoc.Extensions (Extension(Ext_smart)) +import Data.Char (isLetter, isSpace, isDigit, isAscii, ord, isAlphaNum) +import Text.Printf (printf) +import Text.Pandoc.Shared (safeRead, elemText) +import qualified Data.Text.Normalize as Normalize +import Data.List (uncons) + +data StringContext = TextString + | URLString + | CodeString + deriving (Eq) + +-- escape things as needed for LaTeX +stringToLaTeX :: PandocMonad m => StringContext -> Text -> LW m Text +stringToLaTeX context zs = do + opts <- gets stOptions + when ('\x200c' `elemText` zs) $ + modify (\s -> s { stZwnj = True }) + return $ T.pack $ + foldr (go opts context) mempty $ T.unpack $ + if writerPreferAscii opts + then Normalize.normalize Normalize.NFD zs + else zs + where + go :: WriterOptions -> StringContext -> Char -> String -> String + go opts ctx x xs = + let ligatures = isEnabled Ext_smart opts && ctx == TextString + isUrl = ctx == URLString + mbAccentCmd = + if writerPreferAscii opts && ctx == TextString + then uncons xs >>= \(c,_) -> lookupAccent c + else Nothing + emits s = + case mbAccentCmd of + Just cmd -> + cmd <> "{" <> s <> "}" <> drop 1 xs -- drop combining accent + Nothing -> s <> xs + emitc c = + case mbAccentCmd of + Just cmd -> + cmd <> "{" <> [c] <> "}" <> drop 1 xs -- drop combining accent + Nothing -> c : xs + emitcseq cs = + case xs of + c:_ | isLetter c + , ctx == TextString + -> cs <> " " <> xs + | isSpace c -> cs <> "{}" <> xs + | ctx == TextString + -> cs <> xs + _ -> cs <> "{}" <> xs + emitquote cs = + case xs of + '`':_ -> cs <> "\\," <> xs -- add thin space + '\'':_ -> cs <> "\\," <> xs -- add thin space + _ -> cs <> xs + in case x of + '?' | ligatures -> -- avoid ?` ligature + case xs of + '`':_ -> emits "?{}" + _ -> emitc x + '!' | ligatures -> -- avoid !` ligature + case xs of + '`':_ -> emits "!{}" + _ -> emitc x + '{' -> emits "\\{" + '}' -> emits "\\}" + '`' | ctx == CodeString -> emitcseq "\\textasciigrave" + '$' | not isUrl -> emits "\\$" + '%' -> emits "\\%" + '&' -> emits "\\&" + '_' | not isUrl -> emits "\\_" + '#' -> emits "\\#" + '-' | not isUrl -> case xs of + -- prevent adjacent hyphens from forming ligatures + ('-':_) -> emits "-\\/" + _ -> emitc '-' + '~' | not isUrl -> emitcseq "\\textasciitilde" + '^' -> emits "\\^{}" + '\\'| isUrl -> emitc '/' -- NB. / works as path sep even on Windows + | otherwise -> emitcseq "\\textbackslash" + '|' | not isUrl -> emitcseq "\\textbar" + '<' -> emitcseq "\\textless" + '>' -> emitcseq "\\textgreater" + '[' -> emits "{[}" -- to avoid interpretation as + ']' -> emits "{]}" -- optional arguments + '\'' | ctx == CodeString -> emitcseq "\\textquotesingle" + '\160' -> emits "~" + '\x200B' -> emits "\\hspace{0pt}" -- zero-width space + '\x202F' -> emits "\\," + '\x2026' -> emitcseq "\\ldots" + '\x2018' | ligatures -> emitquote "`" + '\x2019' | ligatures -> emitquote "'" + '\x201C' | ligatures -> emitquote "``" + '\x201D' | ligatures -> emitquote "''" + '\x2014' | ligatures -> emits "---" + '\x2013' | ligatures -> emits "--" + _ | writerPreferAscii opts + -> case x of + 'ı' -> emitcseq "\\i" + 'ȷ' -> emitcseq "\\j" + 'å' -> emitcseq "\\aa" + 'Å' -> emitcseq "\\AA" + 'ß' -> emitcseq "\\ss" + 'ø' -> emitcseq "\\o" + 'Ø' -> emitcseq "\\O" + 'Ł' -> emitcseq "\\L" + 'ł' -> emitcseq "\\l" + 'æ' -> emitcseq "\\ae" + 'Æ' -> emitcseq "\\AE" + 'œ' -> emitcseq "\\oe" + 'Œ' -> emitcseq "\\OE" + '£' -> emitcseq "\\pounds" + '€' -> emitcseq "\\euro" + '©' -> emitcseq "\\copyright" + _ -> emitc x + | otherwise -> emitc x + +lookupAccent :: Char -> Maybe String +lookupAccent '\779' = Just "\\H" +lookupAccent '\768' = Just "\\`" +lookupAccent '\769' = Just "\\'" +lookupAccent '\770' = Just "\\^" +lookupAccent '\771' = Just "\\~" +lookupAccent '\776' = Just "\\\"" +lookupAccent '\775' = Just "\\." +lookupAccent '\772' = Just "\\=" +lookupAccent '\781' = Just "\\|" +lookupAccent '\817' = Just "\\b" +lookupAccent '\807' = Just "\\c" +lookupAccent '\783' = Just "\\G" +lookupAccent '\777' = Just "\\h" +lookupAccent '\803' = Just "\\d" +lookupAccent '\785' = Just "\\f" +lookupAccent '\778' = Just "\\r" +lookupAccent '\865' = Just "\\t" +lookupAccent '\782' = Just "\\U" +lookupAccent '\780' = Just "\\v" +lookupAccent '\774' = Just "\\u" +lookupAccent '\808' = Just "\\k" +lookupAccent '\8413' = Just "\\textcircled" +lookupAccent _ = Nothing + +toLabel :: PandocMonad m => Text -> LW m Text +toLabel z = go `fmap` stringToLaTeX URLString z + where + go = T.concatMap $ \x -> case x of + _ | (isLetter x || isDigit x) && isAscii x -> T.singleton x + | x `elemText` "_-+=:;." -> T.singleton x + | otherwise -> T.pack $ "ux" <> printf "%x" (ord x) + +-- | Puts contents into LaTeX command. +inCmd :: Text -> Doc Text -> Doc Text +inCmd cmd contents = char '\\' <> literal cmd <> braces contents + +mapAlignment :: Text -> Text +mapAlignment a = case a of + "top" -> "T" + "top-baseline" -> "t" + "bottom" -> "b" + "center" -> "c" + _ -> a + +wrapDiv :: PandocMonad m => Attr -> Doc Text -> LW m (Doc Text) +wrapDiv (_,classes,kvs) t = do + beamer <- gets stBeamer + let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir + lang <- toLang $ lookup "lang" kvs + let wrapColumns = if beamer && "columns" `elem` classes + then \contents -> + let valign = maybe "T" mapAlignment (lookup "align" kvs) + totalwidth = maybe [] (\x -> ["totalwidth=" <> x]) + (lookup "totalwidth" kvs) + onlytextwidth = filter ("onlytextwidth" ==) classes + options = text $ T.unpack $ T.intercalate "," $ + valign : totalwidth ++ onlytextwidth + in inCmd "begin" "columns" <> brackets options + $$ contents + $$ inCmd "end" "columns" + else id + wrapColumn = if beamer && "column" `elem` classes + then \contents -> + let valign = + maybe "" + (brackets . text . T.unpack . mapAlignment) + (lookup "align" kvs) + w = maybe "0.48" fromPct (lookup "width" kvs) + in inCmd "begin" "column" <> + valign <> + braces (literal w <> "\\textwidth") + $$ contents + $$ inCmd "end" "column" + else id + fromPct xs = + case T.unsnoc xs of + Just (ds, '%') -> case safeRead ds of + Just digits -> showFl (digits / 100 :: Double) + Nothing -> xs + _ -> xs + wrapDir = case lookup "dir" kvs of + Just "rtl" -> align "RTL" + Just "ltr" -> align "LTR" + _ -> id + wrapLang txt = case lang of + Just lng -> let (l, o) = toPolyglossiaEnv lng + ops = if T.null o + then "" + else brackets $ literal o + in inCmd "begin" (literal l) <> ops + $$ blankline <> txt <> blankline + $$ inCmd "end" (literal l) + Nothing -> txt + return $ wrapColumns . wrapColumn . wrapDir . wrapLang $ t + +hypertarget :: PandocMonad m => Bool -> Text -> Doc Text -> LW m (Doc Text) +hypertarget _ "" x = return x +hypertarget addnewline ident x = do + ref <- literal `fmap` toLabel ident + return $ text "\\hypertarget" + <> braces ref + <> braces ((if addnewline && not (isEmpty x) + then "%" <> cr + else empty) <> x) + +labelFor :: PandocMonad m => Text -> LW m (Doc Text) +labelFor "" = return empty +labelFor ident = do + ref <- literal `fmap` toLabel ident + return $ text "\\label" <> braces ref + +-- Determine listings language from list of class attributes. +getListingsLanguage :: [Text] -> Maybe Text +getListingsLanguage xs + = foldr ((<|>) . toListingsLanguage) Nothing xs + +mbBraced :: Text -> Text +mbBraced x = if not (T.all isAlphaNum x) + then "{" <> x <> "}" + else x |
