diff options
author | John MacFarlane <jgm@berkeley.edu> | 2021-04-11 21:28:48 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2021-04-17 16:15:14 -0700 |
commit | aecbf8156eb7c36c4b41de27797e262c23728db5 (patch) | |
tree | 2c9fe9de41a0f7037485dacee444b36cc2ccc110 /src/Text | |
parent | 7ba8c0d2a5e2b89ae1547759510b2ee21de88cb1 (diff) | |
download | pandoc-aecbf8156eb7c36c4b41de27797e262c23728db5.tar.gz |
Remove Text.Pandoc.BCP47 module.
[API change]
Use Lang from UnicodeCollation.Lang instead.
This is a richer implementation of BCP 47.
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc/App.hs | 6 | ||||
-rw-r--r-- | src/Text/Pandoc/BCP47.hs | 99 | ||||
-rw-r--r-- | src/Text/Pandoc/Citeproc.hs | 10 | ||||
-rw-r--r-- | src/Text/Pandoc/Citeproc/BibTeX.hs | 20 | ||||
-rw-r--r-- | src/Text/Pandoc/Citeproc/Data.hs | 12 | ||||
-rw-r--r-- | src/Text/Pandoc/Class/CommonState.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Class/PandocMonad.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/BibTeX.hs | 9 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/Lang.hs | 241 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/BibTeX.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/ConTeXt.hs | 48 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/CslJson.hs | 7 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX/Lang.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/ODT.hs | 7 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/OpenDocument.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Shared.hs | 2 |
19 files changed, 198 insertions, 293 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 6b45e5418..67d3cce7d 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -55,7 +55,7 @@ import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts, import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs, options) import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings) -import Text.Pandoc.BCP47 (Lang (..), parseBCP47) +import UnicodeCollation.Lang (Lang (..), parseLang) import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters) import Text.Pandoc.PDF (makePDF) import Text.Pandoc.SelfContained (makeSelfContained) @@ -200,8 +200,8 @@ convertWithOpts opts = do Just f -> readFileStrict f case lookupMetaString "lang" (optMetadata opts) of - "" -> setTranslations $ Lang "en" "" "US" [] - l -> case parseBCP47 l of + "" -> setTranslations $ Lang "en" Nothing (Just "US") [] [] [] + l -> case parseLang l of Left _ -> report $ InvalidLang l Right l' -> setTranslations l' diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs deleted file mode 100644 index 1ecf0bf73..000000000 --- a/src/Text/Pandoc/BCP47.hs +++ /dev/null @@ -1,99 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- | - Module : Text.Pandoc.BCP47 - Copyright : Copyright (C) 2017-2021 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Functions for parsing and rendering BCP47 language identifiers. --} -module Text.Pandoc.BCP47 ( - getLang - , parseBCP47 - , Lang(..) - , renderLang - ) -where -import Control.Monad (guard) -import Data.Char (isAlphaNum, isAscii, isLetter, isLower, isUpper) -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.DocTemplates (FromContext(..)) -import qualified Data.Text as T -import qualified Text.Parsec as P - --- | Represents BCP 47 language/country code. -data Lang = Lang{ langLanguage :: T.Text - , langScript :: T.Text - , langRegion :: T.Text - , langVariants :: [T.Text] } - deriving (Eq, Ord, Show) - --- | Render a Lang as BCP 47. -renderLang :: Lang -> T.Text -renderLang lang = T.intercalate "-" (langLanguage lang : filter (not . T.null) - ([langScript lang, langRegion lang] ++ langVariants lang)) - --- | Parse a BCP 47 string as a Lang. Currently we parse --- extensions and private-use fields as "variants," even --- though officially they aren't. -parseBCP47 :: T.Text -> Either T.Text Lang -parseBCP47 lang = - case P.parse bcp47 "lang" lang of - Right r -> Right r - Left e -> Left $ T.pack $ show e - where bcp47 = do - language <- pLanguage - script <- P.option "" pScript - region <- P.option "" pRegion - variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse) - P.eof - return Lang{ langLanguage = language - , langScript = script - , langRegion = region - , langVariants = variants } - asciiLetter = P.satisfy (\c -> isAscii c && isLetter c) - pLanguage = do - cs <- P.many1 asciiLetter - let lcs = length cs - guard $ lcs == 2 || lcs == 3 - return $ T.toLower $ T.pack cs - pScript = P.try $ do - P.char '-' - x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c) - xs <- P.count 3 - (P.satisfy (\c -> isAscii c && isLetter c && isLower c)) - return $ T.toLower $ T.pack (x:xs) - pRegion = P.try $ do - P.char '-' - cs <- P.many1 asciiLetter - let lcs = length cs - guard $ lcs == 2 || lcs == 3 - return $ T.toUpper $ T.pack cs - pVariant = P.try $ do - P.char '-' - ds <- P.option "" (P.count 1 P.digit) - cs <- P.many1 asciiLetter - let var = ds ++ cs - lv = length var - guard $ if null ds - then lv >= 5 && lv <= 8 - else lv == 4 - return $ T.toLower $ T.pack var - pExtension = P.try $ do - P.char '-' - cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c) - let lcs = length cs - guard $ lcs >= 2 && lcs <= 8 - return $ T.toLower $ T.pack cs - pPrivateUse = P.try $ do - P.char '-' - P.char 'x' - P.char '-' - cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c) - guard $ not (null cs) && length cs <= 8 - let var = "x-" ++ cs - return $ T.toLower $ T.pack var diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index af302f782..c9f1806e4 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -18,7 +18,6 @@ import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences) import Text.Pandoc.Citeproc.BibTeX (readBibtexString, Variant(..)) import Text.Pandoc.Citeproc.MetaValue (metaValueToReference, metaValueToText) import Text.Pandoc.Readers.Markdown (yamlToRefs) -import qualified Text.Pandoc.BCP47 as BCP47 import Text.Pandoc.Builder (Inlines, Many(..), deleteMeta, setMeta) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Definition as Pandoc @@ -630,13 +629,8 @@ removeFinalPeriod ils = bcp47LangToIETF :: PandocMonad m => Text -> m (Maybe Lang) bcp47LangToIETF bcplang = - case BCP47.parseBCP47 bcplang of + case parseLang bcplang of Left _ -> do report $ InvalidLang bcplang return Nothing - Right lang -> - return $ Just - $ Lang (BCP47.langLanguage lang) - (if T.null (BCP47.langRegion lang) - then Nothing - else Just (BCP47.langRegion lang)) + Right lang -> return $ Just lang diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs index c0752dadc..510e56f9c 100644 --- a/src/Text/Pandoc/Citeproc/BibTeX.hs +++ b/src/Text/Pandoc/Citeproc/BibTeX.hs @@ -205,10 +205,13 @@ writeBibtexString opts variant mblang ref = [ (", " <>) <$> nameGiven name, nameDroppingParticle name ] - mblang' = (parseLang <$> getVariableAsText "language") <|> mblang + mblang' = case getVariableAsText "language" of + Just l -> either (const Nothing) Just $ parseLang l + Nothing -> mblang titlecase = case mblang' of - Just (Lang "en" _) -> titlecase' + Just lang | langLanguage lang == "en" + -> titlecase' Nothing -> titlecase' _ -> case variant of @@ -331,7 +334,7 @@ writeBibtexString opts variant mblang ref = renderFields = mconcat . intersperse ("," <> cr) . mapMaybe renderField defaultLang :: Lang -defaultLang = Lang "en" (Just "US") +defaultLang = Lang "en" Nothing (Just "US") [] [] [] -- a map of bibtex "string" macros type StringMap = Map.Map Text Text @@ -351,9 +354,7 @@ itemToReference locale variant item = do bib item $ do let lang = fromMaybe defaultLang $ localeLanguage locale modify $ \st -> st{ localeLang = lang, - untitlecase = case lang of - (Lang "en" _) -> True - _ -> False } + untitlecase = langLanguage lang == "en" } id' <- asks identifier otherIds <- (Just <$> getRawField "ids") @@ -711,7 +712,7 @@ itemToReference locale variant item = do bib :: Item -> Bib a -> BibParser a -bib entry m = fst <$> evalRWST m entry (BibState True (Lang "en" (Just "US"))) +bib entry m = fst <$> evalRWST m entry (BibState True defaultLang) resolveCrossRefs :: Variant -> [Item] -> [Item] resolveCrossRefs variant entries = @@ -1456,8 +1457,9 @@ resolveKey lang ils = Walk.walk go ils go x = x resolveKey' :: Lang -> Text -> Text -resolveKey' lang@(Lang l _) k = - case Map.lookup l biblatexStringMap >>= Map.lookup (T.toLower k) of +resolveKey' lang k = + case Map.lookup (langLanguage lang) biblatexStringMap >>= + Map.lookup (T.toLower k) of Nothing -> k Just (x, _) -> either (const k) stringify $ parseLaTeX lang x diff --git a/src/Text/Pandoc/Citeproc/Data.hs b/src/Text/Pandoc/Citeproc/Data.hs index 40430b0f5..388b9ba62 100644 --- a/src/Text/Pandoc/Citeproc/Data.hs +++ b/src/Text/Pandoc/Citeproc/Data.hs @@ -21,12 +21,12 @@ biblatexStringMap :: M.Map Text (M.Map Text (Text, Text)) biblatexStringMap = foldr go mempty biblatexLocalizations where go (fp, bs) = - let Lang lang _ _ _ _ _ = parseLang - (toIETF $ T.takeWhile (/= '.') $ T.pack fp) - ls = T.lines $ TE.decodeUtf8 bs - in if length ls > 4 - then M.insert lang (toStringMap $ map (T.splitOn "|") ls) - else id + let ls = T.lines $ TE.decodeUtf8 bs + in case parseLang (toIETF $ T.takeWhile (/= '.') $ T.pack fp) of + Right lang | length ls > 4 + -> M.insert (langLanguage lang) + (toStringMap $ map (T.splitOn "|") ls) + _ -> id toStringMap = foldr go' mempty go' [term, x, y] = M.insert term (x, y) go' _ = id diff --git a/src/Text/Pandoc/Class/CommonState.hs b/src/Text/Pandoc/Class/CommonState.hs index 7e1735c2b..0fd094d99 100644 --- a/src/Text/Pandoc/Class/CommonState.hs +++ b/src/Text/Pandoc/Class/CommonState.hs @@ -19,7 +19,7 @@ where import Data.Default (Default (def)) import Data.Text (Text) -import Text.Pandoc.BCP47 (Lang) +import UnicodeCollation.Lang (Lang) import Text.Pandoc.MediaBag (MediaBag) import Text.Pandoc.Logging (LogMessage, Verbosity (WARNING)) import Text.Pandoc.Translations (Translations) diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index 293a822a0..76f1fa32b 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -70,7 +70,7 @@ import Network.URI ( escapeURIString, nonStrictRelativeTo, import System.FilePath ((</>), (<.>), takeExtension, dropExtension, isRelative, splitDirectories) import System.Random (StdGen) -import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang) +import UnicodeCollation.Lang (Lang(..), parseLang, renderLang) import Text.Pandoc.Class.CommonState (CommonState (..)) import Text.Pandoc.Definition import Text.Pandoc.Error @@ -285,7 +285,7 @@ readFileFromDirs (d:ds) f = catchError toLang :: PandocMonad m => Maybe T.Text -> m (Maybe Lang) toLang Nothing = return Nothing toLang (Just s) = - case parseBCP47 s of + case parseLang s of Left _ -> do report $ InvalidLang s return Nothing diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs index 956b9f1f7..b82a81350 100644 --- a/src/Text/Pandoc/Readers/BibTeX.hs +++ b/src/Text/Pandoc/Readers/BibTeX.hs @@ -48,11 +48,14 @@ readBibLaTeX = readBibTeX' BibTeX.Biblatex readBibTeX' :: PandocMonad m => Variant -> ReaderOptions -> Text -> m Pandoc readBibTeX' variant _opts t = do - lang <- maybe (Lang "en" (Just "US")) parseLang - <$> lookupEnv "LANG" + mblangEnv <- lookupEnv "LANG" + let defaultLang = Lang "en" Nothing (Just "US") [] [] [] + let lang = case mblangEnv of + Nothing -> defaultLang + Just l -> either (const defaultLang) id $ parseLang l locale <- case getLocale lang of Left e -> - case getLocale (Lang "en" (Just "US")) of + case getLocale (Lang "en" Nothing (Just "US") [] [] []) of Right l -> return l Left _ -> throwError $ PandocCiteprocError e Right l -> return l diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 851756065..83caf742a 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -33,7 +33,7 @@ import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T import System.FilePath (addExtension, replaceExtension, takeExtension) -import Text.Pandoc.BCP47 (renderLang) +import UnicodeCollation.Lang (renderLang) import Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocPure (PandocPure) import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath, diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs index 08e217bdb..b92e6ab57 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs @@ -23,7 +23,7 @@ import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Shared (extractSpaces) -import Text.Pandoc.BCP47 (Lang(..), renderLang) +import UnicodeCollation.Lang (Lang(..), renderLang) import Text.Pandoc.Class (PandocMonad(..), setTranslations) import Text.Pandoc.Readers.LaTeX.Parsing import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..), @@ -99,133 +99,136 @@ setDefaultLanguage = do polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang) polyglossiaLangToBCP47 = M.fromList [ ("arabic", \o -> case T.filter (/=' ') o of - "locale=algeria" -> Lang "ar" "" "DZ" [] - "locale=mashriq" -> Lang "ar" "" "SY" [] - "locale=libya" -> Lang "ar" "" "LY" [] - "locale=morocco" -> Lang "ar" "" "MA" [] - "locale=mauritania" -> Lang "ar" "" "MR" [] - "locale=tunisia" -> Lang "ar" "" "TN" [] - _ -> Lang "ar" "" "" []) + "locale=algeria" -> Lang "ar" Nothing (Just "DZ") [] [] [] + "locale=mashriq" -> Lang "ar" Nothing (Just "SY") [] [] [] + "locale=libya" -> Lang "ar" Nothing (Just "LY") [] [] [] + "locale=morocco" -> Lang "ar" Nothing (Just "MA") [] [] [] + "locale=mauritania" -> Lang "ar" Nothing (Just "MR") [] [] [] + "locale=tunisia" -> Lang "ar" Nothing (Just "TN") [] [] [] + _ -> Lang "ar" Nothing (Just "") [] [] []) , ("german", \o -> case T.filter (/=' ') o of - "spelling=old" -> Lang "de" "" "DE" ["1901"] + "spelling=old" -> Lang "de" Nothing (Just "DE") ["1901"] [] [] "variant=austrian,spelling=old" - -> Lang "de" "" "AT" ["1901"] - "variant=austrian" -> Lang "de" "" "AT" [] + -> Lang "de" Nothing (Just "AT") ["1901"] [] [] + "variant=austrian" -> Lang "de" Nothing (Just "AT") [] [] [] "variant=swiss,spelling=old" - -> Lang "de" "" "CH" ["1901"] - "variant=swiss" -> Lang "de" "" "CH" [] - _ -> Lang "de" "" "" []) - , ("lsorbian", \_ -> Lang "dsb" "" "" []) + -> Lang "de" Nothing (Just "CH") ["1901"] [] [] + "variant=swiss" -> Lang "de" Nothing (Just "CH") [] [] [] + _ -> Lang "de" Nothing Nothing [] [] []) + , ("lsorbian", \_ -> Lang "dsb" Nothing Nothing [] [] []) , ("greek", \o -> case T.filter (/=' ') o of - "variant=poly" -> Lang "el" "" "polyton" [] - "variant=ancient" -> Lang "grc" "" "" [] - _ -> Lang "el" "" "" []) + "variant=poly" -> Lang "el" Nothing (Just "polyton") [] [] [] + "variant=ancient" -> Lang "grc" Nothing Nothing [] [] [] + _ -> Lang "el" Nothing Nothing [] [] []) , ("english", \o -> case T.filter (/=' ') o of - "variant=australian" -> Lang "en" "" "AU" [] - "variant=canadian" -> Lang "en" "" "CA" [] - "variant=british" -> Lang "en" "" "GB" [] - "variant=newzealand" -> Lang "en" "" "NZ" [] - "variant=american" -> Lang "en" "" "US" [] - _ -> Lang "en" "" "" []) - , ("usorbian", \_ -> Lang "hsb" "" "" []) + "variant=australian" -> Lang "en" Nothing (Just "AU") [] [] [] + "variant=canadian" -> Lang "en" Nothing (Just "CA") [] [] [] + "variant=british" -> Lang "en" Nothing (Just "GB") [] [] [] + "variant=newzealand" -> Lang "en" Nothing (Just "NZ") [] [] [] + "variant=american" -> Lang "en" Nothing (Just "US") [] [] [] + _ -> Lang "en" Nothing (Just "") [] [] []) + , ("usorbian", \_ -> Lang "hsb" Nothing Nothing [] [] []) , ("latin", \o -> case T.filter (/=' ') o of - "variant=classic" -> Lang "la" "" "" ["x-classic"] - _ -> Lang "la" "" "" []) - , ("slovenian", \_ -> Lang "sl" "" "" []) - , ("serbianc", \_ -> Lang "sr" "cyrl" "" []) - , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"]) - , ("afrikaans", \_ -> Lang "af" "" "" []) - , ("amharic", \_ -> Lang "am" "" "" []) - , ("assamese", \_ -> Lang "as" "" "" []) - , ("asturian", \_ -> Lang "ast" "" "" []) - , ("bulgarian", \_ -> Lang "bg" "" "" []) - , ("bengali", \_ -> Lang "bn" "" "" []) - , ("tibetan", \_ -> Lang "bo" "" "" []) - , ("breton", \_ -> Lang "br" "" "" []) - , ("catalan", \_ -> Lang "ca" "" "" []) - , ("welsh", \_ -> Lang "cy" "" "" []) - , ("czech", \_ -> Lang "cs" "" "" []) - , ("coptic", \_ -> Lang "cop" "" "" []) - , ("danish", \_ -> Lang "da" "" "" []) - , ("divehi", \_ -> Lang "dv" "" "" []) - , ("esperanto", \_ -> Lang "eo" "" "" []) - , ("spanish", \_ -> Lang "es" "" "" []) - , ("estonian", \_ -> Lang "et" "" "" []) - , ("basque", \_ -> Lang "eu" "" "" []) - , ("farsi", \_ -> Lang "fa" "" "" []) - , ("finnish", \_ -> Lang "fi" "" "" []) - , ("french", \_ -> Lang "fr" "" "" []) - , ("friulan", \_ -> Lang "fur" "" "" []) - , ("irish", \_ -> Lang "ga" "" "" []) - , ("scottish", \_ -> Lang "gd" "" "" []) - , ("ethiopic", \_ -> Lang "gez" "" "" []) - , ("galician", \_ -> Lang "gl" "" "" []) - , ("hebrew", \_ -> Lang "he" "" "" []) - , ("hindi", \_ -> Lang "hi" "" "" []) - , ("croatian", \_ -> Lang "hr" "" "" []) - , ("magyar", \_ -> Lang "hu" "" "" []) - , ("armenian", \_ -> Lang "hy" "" "" []) - , ("interlingua", \_ -> Lang "ia" "" "" []) - , ("indonesian", \_ -> Lang "id" "" "" []) - , ("icelandic", \_ -> Lang "is" "" "" []) - , ("italian", \_ -> Lang "it" "" "" []) - , ("japanese", \_ -> Lang "jp" "" "" []) - , ("khmer", \_ -> Lang "km" "" "" []) - , ("kurmanji", \_ -> Lang "kmr" "" "" []) - , ("kannada", \_ -> Lang "kn" "" "" []) - , ("korean", \_ -> Lang "ko" "" "" []) - , ("lao", \_ -> Lang "lo" "" "" []) - , ("lithuanian", \_ -> Lang "lt" "" "" []) - , ("latvian", \_ -> Lang "lv" "" "" []) - , ("malayalam", \_ -> Lang "ml" "" "" []) - , ("mongolian", \_ -> Lang "mn" "" "" []) - , ("marathi", \_ -> Lang "mr" "" "" []) - , ("dutch", \_ -> Lang "nl" "" "" []) - , ("nynorsk", \_ -> Lang "nn" "" "" []) - , ("norsk", \_ -> Lang "no" "" "" []) - , ("nko", \_ -> Lang "nqo" "" "" []) - , ("occitan", \_ -> Lang "oc" "" "" []) - , ("panjabi", \_ -> Lang "pa" "" "" []) - , ("polish", \_ -> Lang "pl" "" "" []) - , ("piedmontese", \_ -> Lang "pms" "" "" []) - , ("portuguese", \_ -> Lang "pt" "" "" []) - , ("romansh", \_ -> Lang "rm" "" "" []) - , ("romanian", \_ -> Lang "ro" "" "" []) - , ("russian", \_ -> Lang "ru" "" "" []) - , ("sanskrit", \_ -> Lang "sa" "" "" []) - , ("samin", \_ -> Lang "se" "" "" []) - , ("slovak", \_ -> Lang "sk" "" "" []) - , ("albanian", \_ -> Lang "sq" "" "" []) - , ("serbian", \_ -> Lang "sr" "" "" []) - , ("swedish", \_ -> Lang "sv" "" "" []) - , ("syriac", \_ -> Lang "syr" "" "" []) - , ("tamil", \_ -> Lang "ta" "" "" []) - , ("telugu", \_ -> Lang "te" "" "" []) - , ("thai", \_ -> Lang "th" "" "" []) - , ("turkmen", \_ -> Lang "tk" "" "" []) - , ("turkish", \_ -> Lang "tr" "" "" []) - , ("ukrainian", \_ -> Lang "uk" "" "" []) - , ("urdu", \_ -> Lang "ur" "" "" []) - , ("vietnamese", \_ -> Lang "vi" "" "" []) + "variant=classic" -> Lang "la" Nothing Nothing ["x-classic"] [] [] + _ -> Lang "la" Nothing Nothing [] [] []) + , ("slovenian", \_ -> Lang "sl" Nothing Nothing [] [] []) + , ("serbianc", \_ -> Lang "sr" (Just "Cyrl") Nothing [] [] []) + , ("pinyin", \_ -> Lang "zh" (Just "Latn") Nothing ["pinyin"] [] []) + , ("afrikaans", \_ -> simpleLang "af") + , ("amharic", \_ -> simpleLang "am") + , ("assamese", \_ -> simpleLang "as") + , ("asturian", \_ -> simpleLang "ast") + , ("bulgarian", \_ -> simpleLang "bg") + , ("bengali", \_ -> simpleLang "bn") + , ("tibetan", \_ -> simpleLang "bo") + , ("breton", \_ -> simpleLang "br") + , ("catalan", \_ -> simpleLang "ca") + , ("welsh", \_ -> simpleLang "cy") + , ("czech", \_ -> simpleLang "cs") + , ("coptic", \_ -> simpleLang "cop") + , ("danish", \_ -> simpleLang "da") + , ("divehi", \_ -> simpleLang "dv") + , ("esperanto", \_ -> simpleLang "eo") + , ("spanish", \_ -> simpleLang "es") + , ("estonian", \_ -> simpleLang "et") + , ("basque", \_ -> simpleLang "eu") + , ("farsi", \_ -> simpleLang "fa") + , ("finnish", \_ -> simpleLang "fi") + , ("french", \_ -> simpleLang "fr") + , ("friulan", \_ -> simpleLang "fur") + , ("irish", \_ -> simpleLang "ga") + , ("scottish", \_ -> simpleLang "gd") + , ("ethiopic", \_ -> simpleLang "gez") + , ("galician", \_ -> simpleLang "gl") + , ("hebrew", \_ -> simpleLang "he") + , ("hindi", \_ -> simpleLang "hi") + , ("croatian", \_ -> simpleLang "hr") + , ("magyar", \_ -> simpleLang "hu") + , ("armenian", \_ -> simpleLang "hy") + , ("interlingua", \_ -> simpleLang "ia") + , ("indonesian", \_ -> simpleLang "id") + , ("icelandic", \_ -> simpleLang "is") + , ("italian", \_ -> simpleLang "it") + , ("japanese", \_ -> simpleLang "jp") + , ("khmer", \_ -> simpleLang "km") + , ("kurmanji", \_ -> simpleLang "kmr") + , ("kannada", \_ -> simpleLang "kn") + , ("korean", \_ -> simpleLang "ko") + , ("lao", \_ -> simpleLang "lo") + , ("lithuanian", \_ -> simpleLang "lt") + , ("latvian", \_ -> simpleLang "lv") + , ("malayalam", \_ -> simpleLang "ml") + , ("mongolian", \_ -> simpleLang "mn") + , ("marathi", \_ -> simpleLang "mr") + , ("dutch", \_ -> simpleLang "nl") + , ("nynorsk", \_ -> simpleLang "nn") + , ("norsk", \_ -> simpleLang "no") + , ("nko", \_ -> simpleLang "nqo") + , ("occitan", \_ -> simpleLang "oc") + , ("panjabi", \_ -> simpleLang "pa") + , ("polish", \_ -> simpleLang "pl") + , ("piedmontese", \_ -> simpleLang "pms") + , ("portuguese", \_ -> simpleLang "pt") + , ("romansh", \_ -> simpleLang "rm") + , ("romanian", \_ -> simpleLang "ro") + , ("russian", \_ -> simpleLang "ru") + , ("sanskrit", \_ -> simpleLang "sa") + , ("samin", \_ -> simpleLang "se") + , ("slovak", \_ -> simpleLang "sk") + , ("albanian", \_ -> simpleLang "sq") + , ("serbian", \_ -> simpleLang "sr") + , ("swedish", \_ -> simpleLang "sv") + , ("syriac", \_ -> simpleLang "syr") + , ("tamil", \_ -> simpleLang "ta") + , ("telugu", \_ -> simpleLang "te") + , ("thai", \_ -> simpleLang "th") + , ("turkmen", \_ -> simpleLang "tk") + , ("turkish", \_ -> simpleLang "tr") + , ("ukrainian", \_ -> simpleLang "uk") + , ("urdu", \_ -> simpleLang "ur") + , ("vietnamese", \_ -> simpleLang "vi") ] +simpleLang :: Text -> Lang +simpleLang l = Lang l Nothing Nothing [] [] [] + babelLangToBCP47 :: T.Text -> Maybe Lang babelLangToBCP47 s = case s of - "austrian" -> Just $ Lang "de" "" "AT" ["1901"] - "naustrian" -> Just $ Lang "de" "" "AT" [] - "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"] - "nswissgerman" -> Just $ Lang "de" "" "CH" [] - "german" -> Just $ Lang "de" "" "DE" ["1901"] - "ngerman" -> Just $ Lang "de" "" "DE" [] - "lowersorbian" -> Just $ Lang "dsb" "" "" [] - "uppersorbian" -> Just $ Lang "hsb" "" "" [] - "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"] - "slovene" -> Just $ Lang "sl" "" "" [] - "australian" -> Just $ Lang "en" "" "AU" [] - "canadian" -> Just $ Lang "en" "" "CA" [] - "british" -> Just $ Lang "en" "" "GB" [] - "newzealand" -> Just $ Lang "en" "" "NZ" [] - "american" -> Just $ Lang "en" "" "US" [] - "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"] + "austrian" -> Just $ Lang "de" Nothing (Just "AT") ["1901"] [] [] + "naustrian" -> Just $ Lang "de" Nothing (Just "AT") [] [] [] + "swissgerman" -> Just $ Lang "de" Nothing (Just "CH") ["1901"] [] [] + "nswissgerman" -> Just $ Lang "de" Nothing (Just "CH") [] [] [] + "german" -> Just $ Lang "de" Nothing (Just "DE") ["1901"] [] [] + "ngerman" -> Just $ Lang "de" Nothing (Just "DE") [] [] [] + "lowersorbian" -> Just $ Lang "dsb" Nothing Nothing [] [] [] + "uppersorbian" -> Just $ Lang "hsb" Nothing Nothing [] [] [] + "polutonikogreek" -> Just $ Lang "el" Nothing Nothing ["polyton"] [] [] + "slovene" -> Just $ simpleLang "sl" + "australian" -> Just $ Lang "en" Nothing (Just "AU") [] [] [] + "canadian" -> Just $ Lang "en" Nothing (Just "CA") [] [] [] + "british" -> Just $ Lang "en" Nothing (Just "GB") [] [] [] + "newzealand" -> Just $ Lang "en" Nothing (Just "NZ") [] [] [] + "american" -> Just $ Lang "en" Nothing (Just "US") [] [] [] + "classiclatin" -> Just $ Lang "la" Nothing Nothing ["x-classic"] [] [] _ -> ($ "") <$> M.lookup s polyglossiaLangToBCP47 diff --git a/src/Text/Pandoc/Writers/BibTeX.hs b/src/Text/Pandoc/Writers/BibTeX.hs index b9ae0c13a..95de6b71f 100644 --- a/src/Text/Pandoc/Writers/BibTeX.hs +++ b/src/Text/Pandoc/Writers/BibTeX.hs @@ -43,7 +43,7 @@ writeBibTeX' :: PandocMonad m => Variant -> WriterOptions -> Pandoc -> m Text writeBibTeX' variant opts (Pandoc meta _) = do let mblang = case lookupMetaString "lang" meta of "" -> Nothing - t -> Just $ parseLang t + t -> either (const Nothing) Just $ parseLang t let refs = case lookupMeta "references" meta of Just (MetaList xs) -> mapMaybe metaValueToReference xs _ -> [] diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 3c9975be8..f352c84bc 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -21,7 +21,7 @@ import Data.Maybe (mapMaybe) import Data.Text (Text) import qualified Data.Text as T import Network.URI (unEscapeString) -import Text.Pandoc.BCP47 +import UnicodeCollation.Lang (Lang(..)) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import Text.Pandoc.Definition import Text.Pandoc.ImageSize @@ -555,26 +555,26 @@ fromBCP47 mbs = fromBCP47' <$> toLang mbs -- https://tools.ietf.org/html/bcp47#section-2.1 -- http://wiki.contextgarden.net/Language_Codes fromBCP47' :: Maybe Lang -> Maybe Text -fromBCP47' (Just (Lang "ar" _ "SY" _) ) = Just "ar-sy" -fromBCP47' (Just (Lang "ar" _ "IQ" _) ) = Just "ar-iq" -fromBCP47' (Just (Lang "ar" _ "JO" _) ) = Just "ar-jo" -fromBCP47' (Just (Lang "ar" _ "LB" _) ) = Just "ar-lb" -fromBCP47' (Just (Lang "ar" _ "DZ" _) ) = Just "ar-dz" -fromBCP47' (Just (Lang "ar" _ "MA" _) ) = Just "ar-ma" -fromBCP47' (Just (Lang "de" _ _ ["1901"]) ) = Just "deo" -fromBCP47' (Just (Lang "de" _ "DE" _) ) = Just "de-de" -fromBCP47' (Just (Lang "de" _ "AT" _) ) = Just "de-at" -fromBCP47' (Just (Lang "de" _ "CH" _) ) = Just "de-ch" -fromBCP47' (Just (Lang "el" _ _ ["poly"]) ) = Just "agr" -fromBCP47' (Just (Lang "en" _ "US" _) ) = Just "en-us" -fromBCP47' (Just (Lang "en" _ "GB" _) ) = Just "en-gb" -fromBCP47' (Just (Lang "grc"_ _ _) ) = Just "agr" -fromBCP47' (Just (Lang "el" _ _ _) ) = Just "gr" -fromBCP47' (Just (Lang "eu" _ _ _) ) = Just "ba" -fromBCP47' (Just (Lang "he" _ _ _) ) = Just "il" -fromBCP47' (Just (Lang "jp" _ _ _) ) = Just "ja" -fromBCP47' (Just (Lang "uk" _ _ _) ) = Just "ua" -fromBCP47' (Just (Lang "vi" _ _ _) ) = Just "vn" -fromBCP47' (Just (Lang "zh" _ _ _) ) = Just "cn" -fromBCP47' (Just (Lang l _ _ _) ) = Just l -fromBCP47' Nothing = Nothing +fromBCP47' (Just (Lang "ar" _ (Just "SY") _ _ _)) = Just "ar-sy" +fromBCP47' (Just (Lang "ar" _ (Just "IQ") _ _ _)) = Just "ar-iq" +fromBCP47' (Just (Lang "ar" _ (Just "JO") _ _ _)) = Just "ar-jo" +fromBCP47' (Just (Lang "ar" _ (Just "LB") _ _ _)) = Just "ar-lb" +fromBCP47' (Just (Lang "ar" _ (Just "DZ") _ _ _)) = Just "ar-dz" +fromBCP47' (Just (Lang "ar" _ (Just "MA") _ _ _)) = Just "ar-ma" +fromBCP47' (Just (Lang "de" _ _ ["1901"] _ _)) = Just "deo" +fromBCP47' (Just (Lang "de" _ (Just "DE") _ _ _)) = Just "de-de" +fromBCP47' (Just (Lang "de" _ (Just "AT") _ _ _)) = Just "de-at" +fromBCP47' (Just (Lang "de" _ (Just "CH") _ _ _)) = Just "de-ch" +fromBCP47' (Just (Lang "el" _ _ ["poly"] _ _)) = Just "agr" +fromBCP47' (Just (Lang "en" _ (Just "US") _ _ _)) = Just "en-us" +fromBCP47' (Just (Lang "en" _ (Just "GB") _ _ _)) = Just "en-gb" +fromBCP47' (Just (Lang "grc"_ _ _ _ _)) = Just "agr" +fromBCP47' (Just (Lang "el" _ _ _ _ _)) = Just "gr" +fromBCP47' (Just (Lang "eu" _ _ _ _ _)) = Just "ba" +fromBCP47' (Just (Lang "he" _ _ _ _ _)) = Just "il" +fromBCP47' (Just (Lang "jp" _ _ _ _ _)) = Just "ja" +fromBCP47' (Just (Lang "uk" _ _ _ _ _)) = Just "ua" +fromBCP47' (Just (Lang "vi" _ _ _ _ _)) = Just "vn" +fromBCP47' (Just (Lang "zh" _ _ _ _ _)) = Just "cn" +fromBCP47' (Just (Lang l _ _ _ _ _)) = Just l +fromBCP47' Nothing = Nothing diff --git a/src/Text/Pandoc/Writers/CslJson.hs b/src/Text/Pandoc/Writers/CslJson.hs index a10def95e..395335667 100644 --- a/src/Text/Pandoc/Writers/CslJson.hs +++ b/src/Text/Pandoc/Writers/CslJson.hs @@ -34,15 +34,16 @@ import Control.Monad.Identity import Citeproc.Locale (getLocale) import Citeproc.CslJson import Text.Pandoc.Options (WriterOptions) -import Data.Maybe (mapMaybe) +import Data.Maybe (mapMaybe, fromMaybe) import Data.Aeson.Encode.Pretty (Config (..), Indent (Spaces), NumberFormat (Generic), defConfig, encodePretty') writeCslJson :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeCslJson _opts (Pandoc meta _) = do - let lang = maybe (Lang "en" (Just "US")) parseLang - (lookupMeta "lang" meta >>= metaValueToText) + let lang = fromMaybe (Lang "en" Nothing (Just "US") [] [] []) + (lookupMeta "lang" meta >>= metaValueToText >>= + either (const Nothing) Just . parseLang) locale <- case getLocale lang of Left e -> throwError $ PandocCiteprocError e Right l -> return l diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 20bcd0324..7781df8e7 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -36,7 +36,7 @@ import qualified Data.Text.Lazy as TL import Data.Time.Clock.POSIX import Data.Digest.Pure.SHA (sha1, showDigest) import Skylighting -import Text.Pandoc.BCP47 (getLang, renderLang) +import UnicodeCollation.Lang (renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import qualified Text.Pandoc.Class.PandocMonad as P import Data.Time diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 1c970e6ad..e99bad738 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -29,7 +29,7 @@ import qualified Data.Text as T import Network.URI (unEscapeString) import Text.DocTemplates (FromContext(lookupContext), renderTemplate, Val(..), Context(..)) -import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang) +import UnicodeCollation.Lang (Lang (..), renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import Text.Pandoc.Definition import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight, diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs index 871b2692a..437b84120 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs @@ -46,7 +46,7 @@ toPolyglossia (Lang "de" _ (Just "AT") vars _ _) toPolyglossia (Lang "de" _ (Just "AT") _ _ _) = ("german", "variant=austrian") toPolyglossia (Lang "de" _ (Just "CH") vars _ _) | "1901" `elem` vars = ("german", "variant=swiss, spelling=old") -toPolyglossia (Lang "de" _ (Just "CH") _ _ _ _) = ("german", "variant=swiss") +toPolyglossia (Lang "de" _ (Just "CH") _ _ _) = ("german", "variant=swiss") toPolyglossia (Lang "de" _ _ _ _ _) = ("german", "") toPolyglossia (Lang "dsb" _ _ _ _ _) = ("lsorbian", "") toPolyglossia (Lang "el" _ _ vars _ _) @@ -61,9 +61,9 @@ toPolyglossia (Lang "grc" _ _ _ _ _) = ("greek", "variant=ancient") toPolyglossia (Lang "hsb" _ _ _ _ _) = ("usorbian", "") toPolyglossia (Lang "la" _ _ vars _ _) | "x-classic" `elem` vars = ("latin", "variant=classic") -toPolyglossia (Lang "pt" _ "BR" _ _ _) = ("portuguese", "variant=brazilian") +toPolyglossia (Lang "pt" _ (Just "BR") _ _ _) = ("portuguese", "variant=brazilian") toPolyglossia (Lang "sl" _ _ _ _ _) = ("slovenian", "") -toPolyglossia x = (commonFromBcp47 x, "") +toPolyglossia x = (commonFromBcp47 x, "") -- Takes a list of the constituents of a BCP47 language code and -- converts it to a Babel language string. @@ -81,7 +81,7 @@ toBabel (Lang "de" _ _ vars _ _) | "1901" `elem` vars = "german" | otherwise = "ngerman" toBabel (Lang "dsb" _ _ _ _ _) = "lowersorbian" -toBabel (Lang "el" _ _ vars) +toBabel (Lang "el" _ _ vars _ _) | "polyton" `elem` vars = "polutonikogreek" toBabel (Lang "en" _ (Just "AU") _ _ _) = "australian" toBabel (Lang "en" _ (Just "CA") _ _ _) = "canadian" diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 101b236aa..6fd4cdeb4 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -16,6 +16,7 @@ import Codec.Archive.Zip import Control.Monad.Except (catchError, throwError) import Control.Monad.State.Strict import qualified Data.ByteString.Lazy as B +import Data.Maybe (fromMaybe) import Data.Generics (everywhere', mkT) import Data.List (isPrefixOf) import qualified Data.Map as Map @@ -23,7 +24,7 @@ import qualified Data.Text as T import qualified Data.Text.Lazy as TL import Data.Time import System.FilePath (takeDirectory, takeExtension, (<.>)) -import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang) +import UnicodeCollation.Lang (Lang (..), renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import qualified Text.Pandoc.Class.PandocMonad as P import Text.Pandoc.Definition @@ -35,7 +36,7 @@ import Text.Pandoc.Options (WrapOption (..), WriterOptions (..)) import Text.DocLayout import Text.Pandoc.Shared (stringify, pandocVersion, tshow) import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks, - fixDisplayMath) + fixDisplayMath, getLang) import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy) import Text.Pandoc.Walk import Text.Pandoc.Writers.OpenDocument (writeOpenDocument) @@ -194,7 +195,7 @@ addLang lang = everywhere' (mkT updateLangAttr) where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _) = Attr n (langLanguage lang) updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _) - = Attr n (langRegion lang) + = Attr n (fromMaybe "" $ langRegion lang) updateLangAttr x = x -- | transform both Image and Math elements diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index cf42f2228..6c265090c 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -25,7 +25,7 @@ import Data.Ord (comparing) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T -import Text.Pandoc.BCP47 (Lang (..), parseBCP47) +import UnicodeCollation.Lang (Lang (..), parseLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, translateTerm, setTranslations, toLang) import Text.Pandoc.Definition @@ -236,7 +236,7 @@ handleSpaces s = case T.uncons s of -- | Convert Pandoc document to string in OpenDocument format. writeOpenDocument :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeOpenDocument opts (Pandoc meta blocks) = do - let defLang = Lang "en" "US" "" [] + let defLang = Lang "en" (Just "US") Nothing [] [] [] lang <- case lookupMetaString "lang" meta of "" -> pure defLang s -> fromMaybe defLang <$> toLang (Just s) @@ -893,7 +893,7 @@ textStyleAttr m s Map.insert "style:font-name-complex" "Courier New" $ m | Language lang <- s = Map.insert "fo:language" (langLanguage lang) . - Map.insert "fo:country" (langRegion lang) $ m + maybe id (Map.insert "fo:country") (langRegion lang) $ m | otherwise = m withLangFromAttr :: PandocMonad m => Attr -> OD m a -> OD m a @@ -901,7 +901,7 @@ withLangFromAttr (_,_,kvs) action = case lookup "lang" kvs of Nothing -> action Just l -> - case parseBCP47 l of + case parseLang l of Right lang -> withTextStyle (Language lang) action Left _ -> do report $ InvalidLang l diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index fcb47bd5a..a09d18571 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -149,7 +149,7 @@ defField field val (Context m) = f _newval oldval = oldval -- | Get the contents of the `lang` metadata field or variable. -getLang :: WriterOptions -> Meta -> Maybe Text +getLang :: WriterOptions -> Meta -> Maybe T.Text getLang opts meta = case lookupContext "lang" (writerVariables opts) of Just s -> Just s |