aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/LaTeX/Lang.hs
diff options
context:
space:
mode:
authorIgor Pashev <pashev.igor@gmail.com>2021-07-17 18:10:34 +0200
committerIgor Pashev <pashev.igor@gmail.com>2021-07-17 18:46:16 +0200
commit48459559a13a20083fc9b31eb523b8ea2bf0a63f (patch)
tree1c04e75709457403110a6f8c5c90099f22369de3 /src/Text/Pandoc/Readers/LaTeX/Lang.hs
parent0c39509d9b6a58958228cebf5d643598e5c98950 (diff)
parent46099e79defe662e541b12548200caf29063c1c6 (diff)
downloadpandoc-48459559a13a20083fc9b31eb523b8ea2bf0a63f.tar.gz
Merge branch 'master' of https://github.com/jgm/pandoc
Diffstat (limited to 'src/Text/Pandoc/Readers/LaTeX/Lang.hs')
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Lang.hs321
1 files changed, 200 insertions, 121 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index 814b2fe79..6a8327904 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -1,7 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.LaTeX.Lang
- Copyright : Copyright (C) 2018-2020 John MacFarlane
+ Copyright : Copyright (C) 2018-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -12,144 +12,223 @@ Functions for parsing polyglossia and babel language specifiers to
BCP47 'Lang'.
-}
module Text.Pandoc.Readers.LaTeX.Lang
- ( polyglossiaLangToBCP47
+ ( setDefaultLanguage
+ , polyglossiaLangToBCP47
, babelLangToBCP47
+ , enquoteCommands
+ , inlineLanguageCommands
)
where
import qualified Data.Map as M
+import Data.Text (Text)
import qualified Data.Text as T
-import Text.Pandoc.BCP47 (Lang(..))
+import Text.Pandoc.Shared (extractSpaces)
+import Text.Collate.Lang (Lang(..), renderLang)
+import Text.Pandoc.Class (PandocMonad(..), setTranslations)
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..),
+ withQuoteContext)
+import Text.Pandoc.Builder (Blocks, Inlines, setMeta, str, spanWith,
+ singleQuoted, doubleQuoted)
+
+enquote :: PandocMonad m
+ => LP m Inlines
+ -> Bool -> Maybe Text -> LP m Inlines
+enquote tok starred mblang = do
+ skipopts
+ let lang = mblang >>= babelLangToBCP47
+ let langspan = case lang of
+ Nothing -> id
+ Just l -> spanWith ("",[],[("lang", renderLang l)])
+ quoteContext <- sQuoteContext <$> getState
+ if starred || quoteContext == InDoubleQuote
+ then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok
+ else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok
+
+enquoteCommands :: PandocMonad m
+ => LP m Inlines -> M.Map Text (LP m Inlines)
+enquoteCommands tok = M.fromList
+ [ ("enquote*", enquote tok True Nothing)
+ , ("enquote", enquote tok False Nothing)
+ -- foreignquote is supposed to use native quote marks
+ , ("foreignquote*", braced >>= enquote tok True . Just . untokenize)
+ , ("foreignquote", braced >>= enquote tok False . Just . untokenize)
+ -- hypehnquote uses regular quotes
+ , ("hyphenquote*", braced >>= enquote tok True . Just . untokenize)
+ , ("hyphenquote", braced >>= enquote tok False . Just . untokenize)
+ ]
+
+foreignlanguage :: PandocMonad m => LP m Inlines -> LP m Inlines
+foreignlanguage tok = do
+ babelLang <- untokenize <$> braced
+ case babelLangToBCP47 babelLang of
+ Just lang -> spanWith ("", [], [("lang", renderLang lang)]) <$> tok
+ _ -> tok
+
+inlineLanguageCommands :: PandocMonad m
+ => LP m Inlines -> M.Map Text (LP m Inlines)
+inlineLanguageCommands tok =
+ M.fromList $
+ ("foreignlanguage", foreignlanguage tok) :
+ (mk <$> M.toList polyglossiaLangToBCP47)
+ where
+ mk (polyglossia, bcp47Func) =
+ ("text" <> polyglossia, inlineLanguage tok bcp47Func)
+
+inlineLanguage :: PandocMonad m
+ => LP m Inlines -> (Text -> Lang) -> LP m Inlines
+inlineLanguage tok bcp47Func = do
+ o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
+ <$> rawopt
+ let lang = renderLang $ bcp47Func o
+ extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok
+
+setDefaultLanguage :: PandocMonad m => LP m Blocks
+setDefaultLanguage = do
+ o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
+ <$> rawopt
+ polylang <- untokenize <$> braced
+ case M.lookup polylang polyglossiaLangToBCP47 of
+ Nothing -> return mempty -- TODO mzero? warning?
+ Just langFunc -> do
+ let l = langFunc o
+ setTranslations l
+ updateState $ setMeta "lang" $ str (renderLang l)
+ return mempty
polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang)
polyglossiaLangToBCP47 = M.fromList
[ ("arabic", \o -> case T.filter (/=' ') o of
- "locale=algeria" -> Lang "ar" "" "DZ" []
- "locale=mashriq" -> Lang "ar" "" "SY" []
- "locale=libya" -> Lang "ar" "" "LY" []
- "locale=morocco" -> Lang "ar" "" "MA" []
- "locale=mauritania" -> Lang "ar" "" "MR" []
- "locale=tunisia" -> Lang "ar" "" "TN" []
- _ -> Lang "ar" "" "" [])
+ "locale=algeria" -> Lang "ar" Nothing (Just "DZ") [] [] []
+ "locale=mashriq" -> Lang "ar" Nothing (Just "SY") [] [] []
+ "locale=libya" -> Lang "ar" Nothing (Just "LY") [] [] []
+ "locale=morocco" -> Lang "ar" Nothing (Just "MA") [] [] []
+ "locale=mauritania" -> Lang "ar" Nothing (Just "MR") [] [] []
+ "locale=tunisia" -> Lang "ar" Nothing (Just "TN") [] [] []
+ _ -> Lang "ar" Nothing (Just "") [] [] [])
, ("german", \o -> case T.filter (/=' ') o of
- "spelling=old" -> Lang "de" "" "DE" ["1901"]
+ "spelling=old" -> Lang "de" Nothing (Just "DE") ["1901"] [] []
"variant=austrian,spelling=old"
- -> Lang "de" "" "AT" ["1901"]
- "variant=austrian" -> Lang "de" "" "AT" []
+ -> Lang "de" Nothing (Just "AT") ["1901"] [] []
+ "variant=austrian" -> Lang "de" Nothing (Just "AT") [] [] []
"variant=swiss,spelling=old"
- -> Lang "de" "" "CH" ["1901"]
- "variant=swiss" -> Lang "de" "" "CH" []
- _ -> Lang "de" "" "" [])
- , ("lsorbian", \_ -> Lang "dsb" "" "" [])
+ -> Lang "de" Nothing (Just "CH") ["1901"] [] []
+ "variant=swiss" -> Lang "de" Nothing (Just "CH") [] [] []
+ _ -> Lang "de" Nothing Nothing [] [] [])
+ , ("lsorbian", \_ -> Lang "dsb" Nothing Nothing [] [] [])
, ("greek", \o -> case T.filter (/=' ') o of
- "variant=poly" -> Lang "el" "" "polyton" []
- "variant=ancient" -> Lang "grc" "" "" []
- _ -> Lang "el" "" "" [])
+ "variant=poly" -> Lang "el" Nothing (Just "polyton") [] [] []
+ "variant=ancient" -> Lang "grc" Nothing Nothing [] [] []
+ _ -> Lang "el" Nothing Nothing [] [] [])
, ("english", \o -> case T.filter (/=' ') o of
- "variant=australian" -> Lang "en" "" "AU" []
- "variant=canadian" -> Lang "en" "" "CA" []
- "variant=british" -> Lang "en" "" "GB" []
- "variant=newzealand" -> Lang "en" "" "NZ" []
- "variant=american" -> Lang "en" "" "US" []
- _ -> Lang "en" "" "" [])
- , ("usorbian", \_ -> Lang "hsb" "" "" [])
+ "variant=australian" -> Lang "en" Nothing (Just "AU") [] [] []
+ "variant=canadian" -> Lang "en" Nothing (Just "CA") [] [] []
+ "variant=british" -> Lang "en" Nothing (Just "GB") [] [] []
+ "variant=newzealand" -> Lang "en" Nothing (Just "NZ") [] [] []
+ "variant=american" -> Lang "en" Nothing (Just "US") [] [] []
+ _ -> Lang "en" Nothing (Just "") [] [] [])
+ , ("usorbian", \_ -> Lang "hsb" Nothing Nothing [] [] [])
, ("latin", \o -> case T.filter (/=' ') o of
- "variant=classic" -> Lang "la" "" "" ["x-classic"]
- _ -> Lang "la" "" "" [])
- , ("slovenian", \_ -> Lang "sl" "" "" [])
- , ("serbianc", \_ -> Lang "sr" "cyrl" "" [])
- , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"])
- , ("afrikaans", \_ -> Lang "af" "" "" [])
- , ("amharic", \_ -> Lang "am" "" "" [])
- , ("assamese", \_ -> Lang "as" "" "" [])
- , ("asturian", \_ -> Lang "ast" "" "" [])
- , ("bulgarian", \_ -> Lang "bg" "" "" [])
- , ("bengali", \_ -> Lang "bn" "" "" [])
- , ("tibetan", \_ -> Lang "bo" "" "" [])
- , ("breton", \_ -> Lang "br" "" "" [])
- , ("catalan", \_ -> Lang "ca" "" "" [])
- , ("welsh", \_ -> Lang "cy" "" "" [])
- , ("czech", \_ -> Lang "cs" "" "" [])
- , ("coptic", \_ -> Lang "cop" "" "" [])
- , ("danish", \_ -> Lang "da" "" "" [])
- , ("divehi", \_ -> Lang "dv" "" "" [])
- , ("esperanto", \_ -> Lang "eo" "" "" [])
- , ("spanish", \_ -> Lang "es" "" "" [])
- , ("estonian", \_ -> Lang "et" "" "" [])
- , ("basque", \_ -> Lang "eu" "" "" [])
- , ("farsi", \_ -> Lang "fa" "" "" [])
- , ("finnish", \_ -> Lang "fi" "" "" [])
- , ("french", \_ -> Lang "fr" "" "" [])
- , ("friulan", \_ -> Lang "fur" "" "" [])
- , ("irish", \_ -> Lang "ga" "" "" [])
- , ("scottish", \_ -> Lang "gd" "" "" [])
- , ("ethiopic", \_ -> Lang "gez" "" "" [])
- , ("galician", \_ -> Lang "gl" "" "" [])
- , ("hebrew", \_ -> Lang "he" "" "" [])
- , ("hindi", \_ -> Lang "hi" "" "" [])
- , ("croatian", \_ -> Lang "hr" "" "" [])
- , ("magyar", \_ -> Lang "hu" "" "" [])
- , ("armenian", \_ -> Lang "hy" "" "" [])
- , ("interlingua", \_ -> Lang "ia" "" "" [])
- , ("indonesian", \_ -> Lang "id" "" "" [])
- , ("icelandic", \_ -> Lang "is" "" "" [])
- , ("italian", \_ -> Lang "it" "" "" [])
- , ("japanese", \_ -> Lang "jp" "" "" [])
- , ("khmer", \_ -> Lang "km" "" "" [])
- , ("kurmanji", \_ -> Lang "kmr" "" "" [])
- , ("kannada", \_ -> Lang "kn" "" "" [])
- , ("korean", \_ -> Lang "ko" "" "" [])
- , ("lao", \_ -> Lang "lo" "" "" [])
- , ("lithuanian", \_ -> Lang "lt" "" "" [])
- , ("latvian", \_ -> Lang "lv" "" "" [])
- , ("malayalam", \_ -> Lang "ml" "" "" [])
- , ("mongolian", \_ -> Lang "mn" "" "" [])
- , ("marathi", \_ -> Lang "mr" "" "" [])
- , ("dutch", \_ -> Lang "nl" "" "" [])
- , ("nynorsk", \_ -> Lang "nn" "" "" [])
- , ("norsk", \_ -> Lang "no" "" "" [])
- , ("nko", \_ -> Lang "nqo" "" "" [])
- , ("occitan", \_ -> Lang "oc" "" "" [])
- , ("panjabi", \_ -> Lang "pa" "" "" [])
- , ("polish", \_ -> Lang "pl" "" "" [])
- , ("piedmontese", \_ -> Lang "pms" "" "" [])
- , ("portuguese", \_ -> Lang "pt" "" "" [])
- , ("romansh", \_ -> Lang "rm" "" "" [])
- , ("romanian", \_ -> Lang "ro" "" "" [])
- , ("russian", \_ -> Lang "ru" "" "" [])
- , ("sanskrit", \_ -> Lang "sa" "" "" [])
- , ("samin", \_ -> Lang "se" "" "" [])
- , ("slovak", \_ -> Lang "sk" "" "" [])
- , ("albanian", \_ -> Lang "sq" "" "" [])
- , ("serbian", \_ -> Lang "sr" "" "" [])
- , ("swedish", \_ -> Lang "sv" "" "" [])
- , ("syriac", \_ -> Lang "syr" "" "" [])
- , ("tamil", \_ -> Lang "ta" "" "" [])
- , ("telugu", \_ -> Lang "te" "" "" [])
- , ("thai", \_ -> Lang "th" "" "" [])
- , ("turkmen", \_ -> Lang "tk" "" "" [])
- , ("turkish", \_ -> Lang "tr" "" "" [])
- , ("ukrainian", \_ -> Lang "uk" "" "" [])
- , ("urdu", \_ -> Lang "ur" "" "" [])
- , ("vietnamese", \_ -> Lang "vi" "" "" [])
+ "variant=classic" -> Lang "la" Nothing Nothing ["x-classic"] [] []
+ _ -> Lang "la" Nothing Nothing [] [] [])
+ , ("slovenian", \_ -> Lang "sl" Nothing Nothing [] [] [])
+ , ("serbianc", \_ -> Lang "sr" (Just "Cyrl") Nothing [] [] [])
+ , ("pinyin", \_ -> Lang "zh" (Just "Latn") Nothing ["pinyin"] [] [])
+ , ("afrikaans", \_ -> simpleLang "af")
+ , ("amharic", \_ -> simpleLang "am")
+ , ("assamese", \_ -> simpleLang "as")
+ , ("asturian", \_ -> simpleLang "ast")
+ , ("bulgarian", \_ -> simpleLang "bg")
+ , ("bengali", \_ -> simpleLang "bn")
+ , ("tibetan", \_ -> simpleLang "bo")
+ , ("breton", \_ -> simpleLang "br")
+ , ("catalan", \_ -> simpleLang "ca")
+ , ("welsh", \_ -> simpleLang "cy")
+ , ("czech", \_ -> simpleLang "cs")
+ , ("coptic", \_ -> simpleLang "cop")
+ , ("danish", \_ -> simpleLang "da")
+ , ("divehi", \_ -> simpleLang "dv")
+ , ("esperanto", \_ -> simpleLang "eo")
+ , ("spanish", \_ -> simpleLang "es")
+ , ("estonian", \_ -> simpleLang "et")
+ , ("basque", \_ -> simpleLang "eu")
+ , ("farsi", \_ -> simpleLang "fa")
+ , ("finnish", \_ -> simpleLang "fi")
+ , ("french", \_ -> simpleLang "fr")
+ , ("friulan", \_ -> simpleLang "fur")
+ , ("irish", \_ -> simpleLang "ga")
+ , ("scottish", \_ -> simpleLang "gd")
+ , ("ethiopic", \_ -> simpleLang "gez")
+ , ("galician", \_ -> simpleLang "gl")
+ , ("hebrew", \_ -> simpleLang "he")
+ , ("hindi", \_ -> simpleLang "hi")
+ , ("croatian", \_ -> simpleLang "hr")
+ , ("magyar", \_ -> simpleLang "hu")
+ , ("armenian", \_ -> simpleLang "hy")
+ , ("interlingua", \_ -> simpleLang "ia")
+ , ("indonesian", \_ -> simpleLang "id")
+ , ("icelandic", \_ -> simpleLang "is")
+ , ("italian", \_ -> simpleLang "it")
+ , ("japanese", \_ -> simpleLang "jp")
+ , ("khmer", \_ -> simpleLang "km")
+ , ("kurmanji", \_ -> simpleLang "kmr")
+ , ("kannada", \_ -> simpleLang "kn")
+ , ("korean", \_ -> simpleLang "ko")
+ , ("lao", \_ -> simpleLang "lo")
+ , ("lithuanian", \_ -> simpleLang "lt")
+ , ("latvian", \_ -> simpleLang "lv")
+ , ("malayalam", \_ -> simpleLang "ml")
+ , ("mongolian", \_ -> simpleLang "mn")
+ , ("marathi", \_ -> simpleLang "mr")
+ , ("dutch", \_ -> simpleLang "nl")
+ , ("nynorsk", \_ -> simpleLang "nn")
+ , ("norsk", \_ -> simpleLang "no")
+ , ("nko", \_ -> simpleLang "nqo")
+ , ("occitan", \_ -> simpleLang "oc")
+ , ("panjabi", \_ -> simpleLang "pa")
+ , ("polish", \_ -> simpleLang "pl")
+ , ("piedmontese", \_ -> simpleLang "pms")
+ , ("portuguese", \_ -> simpleLang "pt")
+ , ("romansh", \_ -> simpleLang "rm")
+ , ("romanian", \_ -> simpleLang "ro")
+ , ("russian", \_ -> simpleLang "ru")
+ , ("sanskrit", \_ -> simpleLang "sa")
+ , ("samin", \_ -> simpleLang "se")
+ , ("slovak", \_ -> simpleLang "sk")
+ , ("albanian", \_ -> simpleLang "sq")
+ , ("serbian", \_ -> simpleLang "sr")
+ , ("swedish", \_ -> simpleLang "sv")
+ , ("syriac", \_ -> simpleLang "syr")
+ , ("tamil", \_ -> simpleLang "ta")
+ , ("telugu", \_ -> simpleLang "te")
+ , ("thai", \_ -> simpleLang "th")
+ , ("turkmen", \_ -> simpleLang "tk")
+ , ("turkish", \_ -> simpleLang "tr")
+ , ("ukrainian", \_ -> simpleLang "uk")
+ , ("urdu", \_ -> simpleLang "ur")
+ , ("vietnamese", \_ -> simpleLang "vi")
]
+simpleLang :: Text -> Lang
+simpleLang l = Lang l Nothing Nothing [] [] []
+
babelLangToBCP47 :: T.Text -> Maybe Lang
babelLangToBCP47 s =
case s of
- "austrian" -> Just $ Lang "de" "" "AT" ["1901"]
- "naustrian" -> Just $ Lang "de" "" "AT" []
- "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"]
- "nswissgerman" -> Just $ Lang "de" "" "CH" []
- "german" -> Just $ Lang "de" "" "DE" ["1901"]
- "ngerman" -> Just $ Lang "de" "" "DE" []
- "lowersorbian" -> Just $ Lang "dsb" "" "" []
- "uppersorbian" -> Just $ Lang "hsb" "" "" []
- "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"]
- "slovene" -> Just $ Lang "sl" "" "" []
- "australian" -> Just $ Lang "en" "" "AU" []
- "canadian" -> Just $ Lang "en" "" "CA" []
- "british" -> Just $ Lang "en" "" "GB" []
- "newzealand" -> Just $ Lang "en" "" "NZ" []
- "american" -> Just $ Lang "en" "" "US" []
- "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"]
+ "austrian" -> Just $ Lang "de" Nothing (Just "AT") ["1901"] [] []
+ "naustrian" -> Just $ Lang "de" Nothing (Just "AT") [] [] []
+ "swissgerman" -> Just $ Lang "de" Nothing (Just "CH") ["1901"] [] []
+ "nswissgerman" -> Just $ Lang "de" Nothing (Just "CH") [] [] []
+ "german" -> Just $ Lang "de" Nothing (Just "DE") ["1901"] [] []
+ "ngerman" -> Just $ Lang "de" Nothing (Just "DE") [] [] []
+ "lowersorbian" -> Just $ Lang "dsb" Nothing Nothing [] [] []
+ "uppersorbian" -> Just $ Lang "hsb" Nothing Nothing [] [] []
+ "polutonikogreek" -> Just $ Lang "el" Nothing Nothing ["polyton"] [] []
+ "slovene" -> Just $ simpleLang "sl"
+ "australian" -> Just $ Lang "en" Nothing (Just "AU") [] [] []
+ "canadian" -> Just $ Lang "en" Nothing (Just "CA") [] [] []
+ "british" -> Just $ Lang "en" Nothing (Just "GB") [] [] []
+ "newzealand" -> Just $ Lang "en" Nothing (Just "NZ") [] [] []
+ "american" -> Just $ Lang "en" Nothing (Just "US") [] [] []
+ "classiclatin" -> Just $ Lang "la" Nothing Nothing ["x-classic"] [] []
_ -> ($ "") <$> M.lookup s polyglossiaLangToBCP47