diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/BCP47.hs | 13 | ||||
-rw-r--r-- | src/Text/Pandoc/Citeproc/Data.hs | 5 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 1 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX/Lang.hs | 117 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Shared.hs | 14 |
5 files changed, 76 insertions, 74 deletions
diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs index 69824aa57..1ecf0bf73 100644 --- a/src/Text/Pandoc/BCP47.hs +++ b/src/Text/Pandoc/BCP47.hs @@ -37,19 +37,6 @@ renderLang :: Lang -> T.Text renderLang lang = T.intercalate "-" (langLanguage lang : filter (not . T.null) ([langScript lang, langRegion lang] ++ langVariants lang)) --- | Get the contents of the `lang` metadata field or variable. -getLang :: WriterOptions -> Meta -> Maybe T.Text -getLang opts meta = - case lookupContext "lang" (writerVariables opts) of - Just s -> Just s - _ -> - case lookupMeta "lang" meta of - Just (MetaBlocks [Para [Str s]]) -> Just s - Just (MetaBlocks [Plain [Str s]]) -> Just s - Just (MetaInlines [Str s]) -> Just s - Just (MetaString s) -> Just s - _ -> Nothing - -- | Parse a BCP 47 string as a Lang. Currently we parse -- extensions and private-use fields as "variants," even -- though officially they aren't. diff --git a/src/Text/Pandoc/Citeproc/Data.hs b/src/Text/Pandoc/Citeproc/Data.hs index dfdaf2598..40430b0f5 100644 --- a/src/Text/Pandoc/Citeproc/Data.hs +++ b/src/Text/Pandoc/Citeproc/Data.hs @@ -10,7 +10,7 @@ import qualified Data.Text.Encoding as TE import qualified Data.Text as T import Data.Text (Text) import Text.Pandoc.Citeproc.Util (toIETF) -import Citeproc (Lang(..), parseLang) +import UnicodeCollation.Lang (Lang(..), parseLang) biblatexLocalizations :: [(FilePath, ByteString)] biblatexLocalizations = $(embedDir "citeproc/biblatex-localization") @@ -21,7 +21,8 @@ biblatexStringMap :: M.Map Text (M.Map Text (Text, Text)) biblatexStringMap = foldr go mempty biblatexLocalizations where go (fp, bs) = - let Lang lang _ = parseLang (toIETF $ T.takeWhile (/= '.') $ T.pack fp) + let Lang lang _ _ _ _ _ = parseLang + (toIETF $ T.takeWhile (/= '.') $ T.pack fp) ls = T.lines $ TE.decodeUtf8 bs in if length ls > 4 then M.insert lang (toStringMap $ map (T.splitOn "|") ls) diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 95cbdc8b8..e389c1727 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -852,7 +852,6 @@ filteredFilesFromArchive zf f = fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString) fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e) - -- -- IANA URIs -- diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs index 41aafee48..871b2692a 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs @@ -15,7 +15,7 @@ module Text.Pandoc.Writers.LaTeX.Lang toBabel ) where import Data.Text (Text) -import Text.Pandoc.BCP47 (Lang (..)) +import UnicodeCollation.Lang (Lang(..)) -- In environments \Arabic instead of \arabic is used @@ -25,88 +25,89 @@ toPolyglossiaEnv l = ("arabic", o) -> ("Arabic", o) x -> x --- Takes a list of the constituents of a BCP 47 language code and +-- Takes a list of the constituents of a BCP47 language code and -- converts it to a Polyglossia (language, options) tuple -- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf toPolyglossia :: Lang -> (Text, Text) -toPolyglossia (Lang "ar" _ "DZ" _) = ("arabic", "locale=algeria") -toPolyglossia (Lang "ar" _ "IQ" _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ "JO" _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ "LB" _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ "LY" _) = ("arabic", "locale=libya") -toPolyglossia (Lang "ar" _ "MA" _) = ("arabic", "locale=morocco") -toPolyglossia (Lang "ar" _ "MR" _) = ("arabic", "locale=mauritania") -toPolyglossia (Lang "ar" _ "PS" _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ "SY" _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ "TN" _) = ("arabic", "locale=tunisia") -toPolyglossia (Lang "de" _ _ vars) - | "1901" `elem` vars = ("german", "spelling=old") -toPolyglossia (Lang "de" _ "AT" vars) - | "1901" `elem` vars = ("german", "variant=austrian, spelling=old") -toPolyglossia (Lang "de" _ "AT" _) = ("german", "variant=austrian") -toPolyglossia (Lang "de" _ "CH" vars) - | "1901" `elem` vars = ("german", "variant=swiss, spelling=old") -toPolyglossia (Lang "de" _ "CH" _) = ("german", "variant=swiss") -toPolyglossia (Lang "de" _ _ _) = ("german", "") -toPolyglossia (Lang "dsb" _ _ _) = ("lsorbian", "") -toPolyglossia (Lang "el" _ "polyton" _) = ("greek", "variant=poly") -toPolyglossia (Lang "en" _ "AU" _) = ("english", "variant=australian") -toPolyglossia (Lang "en" _ "CA" _) = ("english", "variant=canadian") -toPolyglossia (Lang "en" _ "GB" _) = ("english", "variant=british") -toPolyglossia (Lang "en" _ "NZ" _) = ("english", "variant=newzealand") -toPolyglossia (Lang "en" _ "UK" _) = ("english", "variant=british") -toPolyglossia (Lang "en" _ "US" _) = ("english", "variant=american") -toPolyglossia (Lang "grc" _ _ _) = ("greek", "variant=ancient") -toPolyglossia (Lang "hsb" _ _ _) = ("usorbian", "") -toPolyglossia (Lang "la" _ _ vars) - | "x-classic" `elem` vars = ("latin", "variant=classic") -toPolyglossia (Lang "pt" _ "BR" _) = ("portuguese", "variant=brazilian") -toPolyglossia (Lang "sl" _ _ _) = ("slovenian", "") +toPolyglossia (Lang "ar" _ (Just "DZ") _ _ _) = ("arabic", "locale=algeria") +toPolyglossia (Lang "ar" _ (Just "IQ") _ _ _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ (Just "JO") _ _ _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ (Just "LB") _ _ _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ (Just "LY") _ _ _) = ("arabic", "locale=libya") +toPolyglossia (Lang "ar" _ (Just "MA") _ _ _) = ("arabic", "locale=morocco") +toPolyglossia (Lang "ar" _ (Just "MR") _ _ _) = ("arabic", "locale=mauritania") +toPolyglossia (Lang "ar" _ (Just "PS") _ _ _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ (Just "SY") _ _ _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ (Just "TN") _ _ _) = ("arabic", "locale=tunisia") +toPolyglossia (Lang "de" _ _ vars _ _) + | "1901" `elem` vars = ("german", "spelling=old") +toPolyglossia (Lang "de" _ (Just "AT") vars _ _) + | "1901" `elem` vars = ("german", "variant=austrian, spelling=old") +toPolyglossia (Lang "de" _ (Just "AT") _ _ _) = ("german", "variant=austrian") +toPolyglossia (Lang "de" _ (Just "CH") vars _ _) + | "1901" `elem` vars = ("german", "variant=swiss, spelling=old") +toPolyglossia (Lang "de" _ (Just "CH") _ _ _ _) = ("german", "variant=swiss") +toPolyglossia (Lang "de" _ _ _ _ _) = ("german", "") +toPolyglossia (Lang "dsb" _ _ _ _ _) = ("lsorbian", "") +toPolyglossia (Lang "el" _ _ vars _ _) + | "polyton" `elem` vars = ("greek", "variant=poly") +toPolyglossia (Lang "en" _ (Just "AU") _ _ _) = ("english", "variant=australian") +toPolyglossia (Lang "en" _ (Just "CA") _ _ _) = ("english", "variant=canadian") +toPolyglossia (Lang "en" _ (Just "GB") _ _ _) = ("english", "variant=british") +toPolyglossia (Lang "en" _ (Just "NZ") _ _ _) = ("english", "variant=newzealand") +toPolyglossia (Lang "en" _ (Just "UK") _ _ _) = ("english", "variant=british") +toPolyglossia (Lang "en" _ (Just "US") _ _ _) = ("english", "variant=american") +toPolyglossia (Lang "grc" _ _ _ _ _) = ("greek", "variant=ancient") +toPolyglossia (Lang "hsb" _ _ _ _ _) = ("usorbian", "") +toPolyglossia (Lang "la" _ _ vars _ _) + | "x-classic" `elem` vars = ("latin", "variant=classic") +toPolyglossia (Lang "pt" _ "BR" _ _ _) = ("portuguese", "variant=brazilian") +toPolyglossia (Lang "sl" _ _ _ _ _) = ("slovenian", "") toPolyglossia x = (commonFromBcp47 x, "") --- Takes a list of the constituents of a BCP 47 language code and +-- Takes a list of the constituents of a BCP47 language code and -- converts it to a Babel language string. -- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf -- List of supported languages (slightly outdated): -- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf toBabel :: Lang -> Text -toBabel (Lang "de" _ "AT" vars) +toBabel (Lang "de" _ (Just "AT") vars _ _) | "1901" `elem` vars = "austrian" | otherwise = "naustrian" -toBabel (Lang "de" _ "CH" vars) +toBabel (Lang "de" _ (Just "CH") vars _ _) | "1901" `elem` vars = "swissgerman" | otherwise = "nswissgerman" -toBabel (Lang "de" _ _ vars) +toBabel (Lang "de" _ _ vars _ _) | "1901" `elem` vars = "german" | otherwise = "ngerman" -toBabel (Lang "dsb" _ _ _) = "lowersorbian" +toBabel (Lang "dsb" _ _ _ _ _) = "lowersorbian" toBabel (Lang "el" _ _ vars) | "polyton" `elem` vars = "polutonikogreek" -toBabel (Lang "en" _ "AU" _) = "australian" -toBabel (Lang "en" _ "CA" _) = "canadian" -toBabel (Lang "en" _ "GB" _) = "british" -toBabel (Lang "en" _ "NZ" _) = "newzealand" -toBabel (Lang "en" _ "UK" _) = "british" -toBabel (Lang "en" _ "US" _) = "american" -toBabel (Lang "fr" _ "CA" _) = "canadien" -toBabel (Lang "fra" _ _ vars) +toBabel (Lang "en" _ (Just "AU") _ _ _) = "australian" +toBabel (Lang "en" _ (Just "CA") _ _ _) = "canadian" +toBabel (Lang "en" _ (Just "GB") _ _ _) = "british" +toBabel (Lang "en" _ (Just "NZ") _ _ _) = "newzealand" +toBabel (Lang "en" _ (Just "UK") _ _ _) = "british" +toBabel (Lang "en" _ (Just "US") _ _ _) = "american" +toBabel (Lang "fr" _ (Just "CA") _ _ _) = "canadien" +toBabel (Lang "fra" _ _ vars _ _) | "aca" `elem` vars = "acadian" -toBabel (Lang "grc" _ _ _) = "polutonikogreek" -toBabel (Lang "hsb" _ _ _) = "uppersorbian" -toBabel (Lang "la" _ _ vars) +toBabel (Lang "grc" _ _ _ _ _) = "polutonikogreek" +toBabel (Lang "hsb" _ _ _ _ _) = "uppersorbian" +toBabel (Lang "la" _ _ vars _ _) | "x-classic" `elem` vars = "classiclatin" -toBabel (Lang "pt" _ "BR" _) = "brazilian" -toBabel (Lang "sl" _ _ _) = "slovene" +toBabel (Lang "pt" _ (Just "BR") _ _ _) = "brazilian" +toBabel (Lang "sl" _ _ _ _ _) = "slovene" toBabel x = commonFromBcp47 x --- Takes a list of the constituents of a BCP 47 language code +-- Takes a list of the constituents of a BCP47 language code -- and converts it to a string shared by Babel and Polyglossia. -- https://tools.ietf.org/html/bcp47#section-2.1 commonFromBcp47 :: Lang -> Text -commonFromBcp47 (Lang "sr" "Cyrl" _ _) = "serbianc" -commonFromBcp47 (Lang "zh" "Latn" _ vars) - | "pinyin" `elem` vars = "pinyin" -commonFromBcp47 (Lang l _ _ _) = fromIso l +commonFromBcp47 (Lang "sr" (Just "Cyrl") _ _ _ _) = "serbianc" +commonFromBcp47 (Lang "zh" (Just "Latn") _ vars _ _) + | "pinyin" `elem` vars = "pinyin" +commonFromBcp47 (Lang l _ _ _ _ _) = fromIso l where fromIso "af" = "afrikaans" fromIso "am" = "amharic" diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 91ecb310b..fcb47bd5a 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -20,6 +20,7 @@ module Text.Pandoc.Writers.Shared ( , setField , resetField , defField + , getLang , tagWithAttrs , isDisplayMath , fixDisplayMath @@ -147,6 +148,19 @@ defField field val (Context m) = where f _newval oldval = oldval +-- | Get the contents of the `lang` metadata field or variable. +getLang :: WriterOptions -> Meta -> Maybe Text +getLang opts meta = + case lookupContext "lang" (writerVariables opts) of + Just s -> Just s + _ -> + case lookupMeta "lang" meta of + Just (MetaBlocks [Para [Str s]]) -> Just s + Just (MetaBlocks [Plain [Str s]]) -> Just s + Just (MetaInlines [Str s]) -> Just s + Just (MetaString s) -> Just s + _ -> Nothing + -- | Produce an HTML tag with the given pandoc attributes. tagWithAttrs :: HasChars a => T.Text -> Attr -> Doc a tagWithAttrs tag (ident,classes,kvs) = hsep |