Remove Text.Pandoc.BCP47 module.

[API change] Use Lang from UnicodeCollation.Lang instead. This is a richer implementation of BCP 47.
author: John MacFarlane <jgm@berkeley.edu> 2021-04-11 21:28:48 -0700
committer: John MacFarlane <jgm@berkeley.edu> 2021-04-17 16:15:14 -0700
commit: aecbf8156eb7c36c4b41de27797e262c23728db5 (patch)
tree: 2c9fe9de41a0f7037485dacee444b36cc2ccc110 /src/Text
parent: 7ba8c0d2a5e2b89ae1547759510b2ee21de88cb1 (diff)
download: pandoc-aecbf8156eb7c36c4b41de27797e262c23728db5.tar.gz
19 files changed, 198 insertions, 293 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 6b45e5418..67d3cce7d 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -55,7 +55,7 @@ import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts,
 import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs,
                                            options)
 import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings)
-import Text.Pandoc.BCP47 (Lang (..), parseBCP47)
+import UnicodeCollation.Lang (Lang (..), parseLang)
 import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters)
 import Text.Pandoc.PDF (makePDF)
 import Text.Pandoc.SelfContained (makeSelfContained)
@@ -200,8 +200,8 @@ convertWithOpts opts = do
                     Just f  -> readFileStrict f
 
     case lookupMetaString "lang" (optMetadata opts) of
-           ""      -> setTranslations $ Lang "en" "" "US" []
-           l       -> case parseBCP47 l of
+           ""      -> setTranslations $ Lang "en" Nothing (Just "US") [] [] []
+           l       -> case parseLang l of
                            Left _   -> report $ InvalidLang l
                            Right l' -> setTranslations l'
 
diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs
deleted file mode 100644
index 1ecf0bf73..000000000
--- a/src/Text/Pandoc/BCP47.hs
+++ /dev/null
@@ -1,99 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-{- |
-   Module      : Text.Pandoc.BCP47
-   Copyright   : Copyright (C) 2017-2021 John MacFarlane
-   License     : GNU GPL, version 2 or above
-
-   Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha
-   Portability : portable
-
-Functions for parsing and rendering BCP47 language identifiers.
--}
-module Text.Pandoc.BCP47 (
-                       getLang
-                     , parseBCP47
-                     , Lang(..)
-                     , renderLang
-                     )
-where
-import Control.Monad (guard)
-import Data.Char (isAlphaNum, isAscii, isLetter, isLower, isUpper)
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.DocTemplates (FromContext(..))
-import qualified Data.Text as T
-import qualified Text.Parsec as P
-
--- | Represents BCP 47 language/country code.
-data Lang = Lang{ langLanguage :: T.Text
-                , langScript   :: T.Text
-                , langRegion   :: T.Text
-                , langVariants :: [T.Text] }
-                deriving (Eq, Ord, Show)
-
--- | Render a Lang as BCP 47.
-renderLang :: Lang -> T.Text
-renderLang lang = T.intercalate "-" (langLanguage lang : filter (not . T.null)
-                    ([langScript lang, langRegion lang] ++ langVariants lang))
-
--- | Parse a BCP 47 string as a Lang.  Currently we parse
--- extensions and private-use fields as "variants," even
--- though officially they aren't.
-parseBCP47 :: T.Text -> Either T.Text Lang
-parseBCP47 lang =
-  case P.parse bcp47 "lang" lang of
-       Right r -> Right r
-       Left e  -> Left $ T.pack $ show e
-  where bcp47 = do
-          language <- pLanguage
-          script <- P.option "" pScript
-          region <- P.option "" pRegion
-          variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse)
-          P.eof
-          return Lang{   langLanguage = language
-                       , langScript = script
-                       , langRegion = region
-                       , langVariants = variants }
-        asciiLetter = P.satisfy (\c -> isAscii c && isLetter c)
-        pLanguage = do
-          cs <- P.many1 asciiLetter
-          let lcs = length cs
-          guard $ lcs == 2 || lcs == 3
-          return $ T.toLower $ T.pack cs
-        pScript = P.try $ do
-          P.char '-'
-          x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c)
-          xs <- P.count 3
-                 (P.satisfy (\c -> isAscii c && isLetter c && isLower c))
-          return $ T.toLower $ T.pack (x:xs)
-        pRegion = P.try $ do
-          P.char '-'
-          cs <- P.many1 asciiLetter
-          let lcs = length cs
-          guard $ lcs == 2 || lcs == 3
-          return $ T.toUpper $ T.pack cs
-        pVariant = P.try $ do
-          P.char '-'
-          ds <- P.option "" (P.count 1 P.digit)
-          cs <- P.many1 asciiLetter
-          let var = ds ++ cs
-              lv = length var
-          guard $ if null ds
-                     then lv >= 5 && lv <= 8
-                     else lv == 4
-          return $ T.toLower $ T.pack var
-        pExtension = P.try $ do
-          P.char '-'
-          cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
-          let lcs = length cs
-          guard $ lcs >= 2 && lcs <= 8
-          return $ T.toLower $ T.pack cs
-        pPrivateUse = P.try $ do
-          P.char '-'
-          P.char 'x'
-          P.char '-'
-          cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
-          guard $ not (null cs) && length cs <= 8
-          let var = "x-" ++ cs
-          return $ T.toLower $ T.pack var
diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index af302f782..c9f1806e4 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -18,7 +18,6 @@ import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences)
 import Text.Pandoc.Citeproc.BibTeX (readBibtexString, Variant(..))
 import Text.Pandoc.Citeproc.MetaValue (metaValueToReference, metaValueToText)
 import Text.Pandoc.Readers.Markdown (yamlToRefs)
-import qualified Text.Pandoc.BCP47 as BCP47
 import Text.Pandoc.Builder (Inlines, Many(..), deleteMeta, setMeta)
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Definition as Pandoc
@@ -630,13 +629,8 @@ removeFinalPeriod ils =
 
 bcp47LangToIETF :: PandocMonad m => Text -> m (Maybe Lang)
 bcp47LangToIETF bcplang =
-  case BCP47.parseBCP47 bcplang of
+  case parseLang bcplang of
     Left _ -> do
       report $ InvalidLang bcplang
       return Nothing
-    Right lang ->
-      return $ Just
-             $ Lang (BCP47.langLanguage lang)
-                    (if T.null (BCP47.langRegion lang)
-                        then Nothing
-                        else Just (BCP47.langRegion lang))
+    Right lang -> return $ Just lang
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index c0752dadc..510e56f9c 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -205,10 +205,13 @@ writeBibtexString opts variant mblang ref =
                    [ (", " <>) <$> nameGiven name,
                      nameDroppingParticle name ]
 
-  mblang' = (parseLang <$> getVariableAsText "language") <|> mblang
+  mblang' = case getVariableAsText "language" of
+              Just l  -> either (const Nothing) Just $ parseLang l
+              Nothing -> mblang
 
   titlecase = case mblang' of
-                Just (Lang "en" _) -> titlecase'
+                Just lang | langLanguage lang == "en"
+                                   -> titlecase'
                 Nothing            -> titlecase'
                 _                  ->
                   case variant of
@@ -331,7 +334,7 @@ writeBibtexString opts variant mblang ref =
   renderFields = mconcat . intersperse ("," <> cr) . mapMaybe renderField
 
 defaultLang :: Lang
-defaultLang = Lang "en" (Just "US")
+defaultLang = Lang "en" Nothing (Just "US") [] [] []
 
 -- a map of bibtex "string" macros
 type StringMap = Map.Map Text Text
@@ -351,9 +354,7 @@ itemToReference locale variant item = do
   bib item $ do
     let lang = fromMaybe defaultLang $ localeLanguage locale
     modify $ \st -> st{ localeLang = lang,
-                        untitlecase = case lang of
-                                           (Lang "en" _) -> True
-                                           _             -> False }
+                        untitlecase = langLanguage lang == "en" }
 
     id' <- asks identifier
     otherIds <- (Just <$> getRawField "ids")
@@ -711,7 +712,7 @@ itemToReference locale variant item = do
 
 
 bib :: Item -> Bib a -> BibParser a
-bib entry m = fst <$> evalRWST m entry (BibState True (Lang "en" (Just "US")))
+bib entry m = fst <$> evalRWST m entry (BibState True defaultLang)
 
 resolveCrossRefs :: Variant -> [Item] -> [Item]
 resolveCrossRefs variant entries =
@@ -1456,8 +1457,9 @@ resolveKey lang ils = Walk.walk go ils
         go x       = x
 
 resolveKey' :: Lang -> Text -> Text
-resolveKey' lang@(Lang l _) k =
-  case Map.lookup l biblatexStringMap >>= Map.lookup (T.toLower k) of
+resolveKey' lang k =
+  case Map.lookup (langLanguage lang) biblatexStringMap >>=
+        Map.lookup (T.toLower k) of
     Nothing     -> k
     Just (x, _) -> either (const k) stringify $ parseLaTeX lang x
 
diff --git a/src/Text/Pandoc/Citeproc/Data.hs b/src/Text/Pandoc/Citeproc/Data.hs
index 40430b0f5..388b9ba62 100644
--- a/src/Text/Pandoc/Citeproc/Data.hs
+++ b/src/Text/Pandoc/Citeproc/Data.hs
@@ -21,12 +21,12 @@ biblatexStringMap :: M.Map Text (M.Map Text (Text, Text))
 biblatexStringMap = foldr go mempty biblatexLocalizations
  where
   go (fp, bs) =
-    let Lang lang _ _ _ _ _ = parseLang
-                                (toIETF $ T.takeWhile (/= '.') $ T.pack fp)
-        ls = T.lines $ TE.decodeUtf8 bs
-     in if length ls > 4
-           then M.insert lang (toStringMap $ map (T.splitOn "|") ls)
-           else id
+    let ls = T.lines $ TE.decodeUtf8 bs
+     in case parseLang (toIETF $ T.takeWhile (/= '.') $ T.pack fp) of
+          Right lang | length ls > 4
+            -> M.insert (langLanguage lang)
+                        (toStringMap $ map (T.splitOn "|") ls)
+          _ -> id
   toStringMap = foldr go' mempty
   go' [term, x, y] = M.insert term (x, y)
   go' _ = id
diff --git a/src/Text/Pandoc/Class/CommonState.hs b/src/Text/Pandoc/Class/CommonState.hs
index 7e1735c2b..0fd094d99 100644
--- a/src/Text/Pandoc/Class/CommonState.hs
+++ b/src/Text/Pandoc/Class/CommonState.hs
@@ -19,7 +19,7 @@ where
 
 import Data.Default (Default (def))
 import Data.Text (Text)
-import Text.Pandoc.BCP47 (Lang)
+import UnicodeCollation.Lang (Lang)
 import Text.Pandoc.MediaBag (MediaBag)
 import Text.Pandoc.Logging (LogMessage, Verbosity (WARNING))
 import Text.Pandoc.Translations (Translations)
diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index 293a822a0..76f1fa32b 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -70,7 +70,7 @@ import Network.URI ( escapeURIString, nonStrictRelativeTo,
 import System.FilePath ((</>), (<.>), takeExtension, dropExtension,
                         isRelative, splitDirectories)
 import System.Random (StdGen)
-import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang)
+import UnicodeCollation.Lang (Lang(..), parseLang, renderLang)
 import Text.Pandoc.Class.CommonState (CommonState (..))
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
@@ -285,7 +285,7 @@ readFileFromDirs (d:ds) f = catchError
 toLang :: PandocMonad m => Maybe T.Text -> m (Maybe Lang)
 toLang Nothing = return Nothing
 toLang (Just s) =
-  case parseBCP47 s of
+  case parseLang s of
        Left _ -> do
          report $ InvalidLang s
          return Nothing
diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs
index 956b9f1f7..b82a81350 100644
--- a/src/Text/Pandoc/Readers/BibTeX.hs
+++ b/src/Text/Pandoc/Readers/BibTeX.hs
@@ -48,11 +48,14 @@ readBibLaTeX = readBibTeX' BibTeX.Biblatex
 
 readBibTeX' :: PandocMonad m => Variant -> ReaderOptions -> Text -> m Pandoc
 readBibTeX' variant _opts t = do
-  lang <- maybe (Lang "en" (Just "US")) parseLang
-             <$> lookupEnv "LANG"
+  mblangEnv <- lookupEnv "LANG"
+  let defaultLang = Lang "en" Nothing (Just "US") [] [] []
+  let lang = case mblangEnv of
+              Nothing  -> defaultLang
+              Just l   -> either (const defaultLang) id $ parseLang l
   locale <- case getLocale lang of
                Left e  ->
-                 case getLocale (Lang "en" (Just "US")) of
+                 case getLocale (Lang "en" Nothing (Just "US") [] [] []) of
                    Right l -> return l
                    Left _  -> throwError $ PandocCiteprocError e
                Right l -> return l
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 851756065..83caf742a 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -33,7 +33,7 @@ import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
 import System.FilePath (addExtension, replaceExtension, takeExtension)
-import Text.Pandoc.BCP47 (renderLang)
+import UnicodeCollation.Lang (renderLang)
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index 08e217bdb..b92e6ab57 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -23,7 +23,7 @@ import qualified Data.Map as M
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Shared (extractSpaces)
-import Text.Pandoc.BCP47 (Lang(..), renderLang)
+import UnicodeCollation.Lang (Lang(..), renderLang)
 import Text.Pandoc.Class (PandocMonad(..), setTranslations)
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..),
@@ -99,133 +99,136 @@ setDefaultLanguage = do
 polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang)
 polyglossiaLangToBCP47 = M.fromList
   [ ("arabic", \o -> case T.filter (/=' ') o of
-       "locale=algeria"    -> Lang "ar" "" "DZ" []
-       "locale=mashriq"    -> Lang "ar" "" "SY" []
-       "locale=libya"      -> Lang "ar" "" "LY" []
-       "locale=morocco"    -> Lang "ar" "" "MA" []
-       "locale=mauritania" -> Lang "ar" "" "MR" []
-       "locale=tunisia"    -> Lang "ar" "" "TN" []
-       _                   -> Lang "ar" "" "" [])
+       "locale=algeria"    -> Lang "ar" Nothing (Just "DZ") [] [] []
+       "locale=mashriq"    -> Lang "ar" Nothing (Just "SY") [] [] []
+       "locale=libya"      -> Lang "ar" Nothing (Just "LY") [] [] []
+       "locale=morocco"    -> Lang "ar" Nothing (Just "MA") [] [] []
+       "locale=mauritania" -> Lang "ar" Nothing (Just "MR") [] [] []
+       "locale=tunisia"    -> Lang "ar" Nothing (Just "TN") [] [] []
+       _                   -> Lang "ar" Nothing (Just "")   [] [] [])
   , ("german", \o -> case T.filter (/=' ') o of
-       "spelling=old" -> Lang "de" "" "DE" ["1901"]
+       "spelling=old" -> Lang "de" Nothing (Just "DE") ["1901"] [] []
        "variant=austrian,spelling=old"
-                       -> Lang "de" "" "AT" ["1901"]
-       "variant=austrian" -> Lang "de" "" "AT" []
+                       -> Lang "de" Nothing (Just "AT") ["1901"] [] []
+       "variant=austrian" -> Lang "de" Nothing (Just "AT") [] [] []
        "variant=swiss,spelling=old"
-                       -> Lang "de" "" "CH" ["1901"]
-       "variant=swiss" -> Lang "de" "" "CH" []
-       _ -> Lang "de" "" "" [])
-  , ("lsorbian", \_ -> Lang "dsb" "" "" [])
+                       -> Lang "de" Nothing (Just "CH") ["1901"] [] []
+       "variant=swiss" -> Lang "de" Nothing (Just "CH") [] [] []
+       _ -> Lang "de" Nothing Nothing [] [] [])
+  , ("lsorbian", \_ -> Lang "dsb" Nothing Nothing [] [] [])
   , ("greek", \o -> case T.filter (/=' ') o of
-       "variant=poly"    -> Lang "el" "" "polyton" []
-       "variant=ancient" -> Lang "grc" "" "" []
-       _                 -> Lang "el" "" "" [])
+       "variant=poly"    -> Lang "el" Nothing (Just "polyton") [] [] []
+       "variant=ancient" -> Lang "grc" Nothing Nothing [] [] []
+       _                 -> Lang "el" Nothing Nothing [] [] [])
   , ("english", \o -> case T.filter (/=' ') o of
-       "variant=australian" -> Lang "en" "" "AU" []
-       "variant=canadian"   -> Lang "en" "" "CA" []
-       "variant=british"    -> Lang "en" "" "GB" []
-       "variant=newzealand" -> Lang "en" "" "NZ" []
-       "variant=american"   -> Lang "en" "" "US" []
-       _                    -> Lang "en" "" "" [])
-  , ("usorbian", \_ -> Lang "hsb" "" "" [])
+       "variant=australian" -> Lang "en" Nothing (Just "AU") [] [] []
+       "variant=canadian"   -> Lang "en" Nothing (Just "CA") [] [] []
+       "variant=british"    -> Lang "en" Nothing (Just "GB") [] [] []
+       "variant=newzealand" -> Lang "en" Nothing (Just "NZ") [] [] []
+       "variant=american"   -> Lang "en" Nothing (Just "US") [] [] []
+       _                    -> Lang "en" Nothing (Just "")   [] [] [])
+  , ("usorbian", \_ -> Lang "hsb" Nothing Nothing [] [] [])
   , ("latin", \o -> case T.filter (/=' ') o of
-       "variant=classic" -> Lang "la" "" "" ["x-classic"]
-       _                 -> Lang "la" "" "" [])
-  , ("slovenian", \_ -> Lang "sl" "" "" [])
-  , ("serbianc", \_ -> Lang "sr" "cyrl" "" [])
-  , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"])
-  , ("afrikaans", \_ -> Lang "af" "" "" [])
-  , ("amharic", \_ -> Lang "am" "" "" [])
-  , ("assamese", \_ -> Lang "as" "" "" [])
-  , ("asturian", \_ -> Lang "ast" "" "" [])
-  , ("bulgarian", \_ -> Lang "bg" "" "" [])
-  , ("bengali", \_ -> Lang "bn" "" "" [])
-  , ("tibetan", \_ -> Lang "bo" "" "" [])
-  , ("breton", \_ -> Lang "br" "" "" [])
-  , ("catalan", \_ -> Lang "ca" "" "" [])
-  , ("welsh", \_ -> Lang "cy" "" "" [])
-  , ("czech", \_ -> Lang "cs" "" "" [])
-  , ("coptic", \_ -> Lang "cop" "" "" [])
-  , ("danish", \_ -> Lang "da" "" "" [])
-  , ("divehi", \_ -> Lang "dv" "" "" [])
-  , ("esperanto", \_ -> Lang "eo" "" "" [])
-  , ("spanish", \_ -> Lang "es" "" "" [])
-  , ("estonian", \_ -> Lang "et" "" "" [])
-  , ("basque", \_ -> Lang "eu" "" "" [])
-  , ("farsi", \_ -> Lang "fa" "" "" [])
-  , ("finnish", \_ -> Lang "fi" "" "" [])
-  , ("french", \_ -> Lang "fr" "" "" [])
-  , ("friulan", \_ -> Lang "fur" "" "" [])
-  , ("irish", \_ -> Lang "ga" "" "" [])
-  , ("scottish", \_ -> Lang "gd" "" "" [])
-  , ("ethiopic", \_ -> Lang "gez" "" "" [])
-  , ("galician", \_ -> Lang "gl" "" "" [])
-  , ("hebrew", \_ -> Lang "he" "" "" [])
-  , ("hindi", \_ -> Lang "hi" "" "" [])
-  , ("croatian", \_ -> Lang "hr" "" "" [])
-  , ("magyar", \_ -> Lang "hu" "" "" [])
-  , ("armenian", \_ -> Lang "hy" "" "" [])
-  , ("interlingua", \_ -> Lang "ia" "" "" [])
-  , ("indonesian", \_ -> Lang "id" "" "" [])
-  , ("icelandic", \_ -> Lang "is" "" "" [])
-  , ("italian", \_ -> Lang "it" "" "" [])
-  , ("japanese", \_ -> Lang "jp" "" "" [])
-  , ("khmer", \_ -> Lang "km" "" "" [])
-  , ("kurmanji", \_ -> Lang "kmr" "" "" [])
-  , ("kannada", \_ -> Lang "kn" "" "" [])
-  , ("korean", \_ -> Lang "ko" "" "" [])
-  , ("lao", \_ -> Lang "lo" "" "" [])
-  , ("lithuanian", \_ -> Lang "lt" "" "" [])
-  , ("latvian", \_ -> Lang "lv" "" "" [])
-  , ("malayalam", \_ -> Lang "ml" "" "" [])
-  , ("mongolian", \_ -> Lang "mn" "" "" [])
-  , ("marathi", \_ -> Lang "mr" "" "" [])
-  , ("dutch", \_ -> Lang "nl" "" "" [])
-  , ("nynorsk", \_ -> Lang "nn" "" "" [])
-  , ("norsk", \_ -> Lang "no" "" "" [])
-  , ("nko", \_ -> Lang "nqo" "" "" [])
-  , ("occitan", \_ -> Lang "oc" "" "" [])
-  , ("panjabi", \_ -> Lang "pa" "" "" [])
-  , ("polish", \_ -> Lang "pl" "" "" [])
-  , ("piedmontese", \_ -> Lang "pms" "" "" [])
-  , ("portuguese", \_ -> Lang "pt" "" "" [])
-  , ("romansh", \_ -> Lang "rm" "" "" [])
-  , ("romanian", \_ -> Lang "ro" "" "" [])
-  , ("russian", \_ -> Lang "ru" "" "" [])
-  , ("sanskrit", \_ -> Lang "sa" "" "" [])
-  , ("samin", \_ -> Lang "se" "" "" [])
-  , ("slovak", \_ -> Lang "sk" "" "" [])
-  , ("albanian", \_ -> Lang "sq" "" "" [])
-  , ("serbian", \_ -> Lang "sr" "" "" [])
-  , ("swedish", \_ -> Lang "sv" "" "" [])
-  , ("syriac", \_ -> Lang "syr" "" "" [])
-  , ("tamil", \_ -> Lang "ta" "" "" [])
-  , ("telugu", \_ -> Lang "te" "" "" [])
-  , ("thai", \_ -> Lang "th" "" "" [])
-  , ("turkmen", \_ -> Lang "tk" "" "" [])
-  , ("turkish", \_ -> Lang "tr" "" "" [])
-  , ("ukrainian", \_ -> Lang "uk" "" "" [])
-  , ("urdu", \_ -> Lang "ur" "" "" [])
-  , ("vietnamese", \_ -> Lang "vi" "" "" [])
+       "variant=classic" -> Lang "la" Nothing Nothing ["x-classic"] [] []
+       _                 -> Lang "la" Nothing Nothing [] [] [])
+  , ("slovenian", \_ -> Lang "sl" Nothing Nothing [] [] [])
+  , ("serbianc", \_ -> Lang "sr" (Just "Cyrl") Nothing [] [] [])
+  , ("pinyin", \_ -> Lang "zh" (Just "Latn") Nothing ["pinyin"] [] [])
+  , ("afrikaans", \_ -> simpleLang "af")
+  , ("amharic", \_ -> simpleLang "am")
+  , ("assamese", \_ -> simpleLang "as")
+  , ("asturian", \_ -> simpleLang "ast")
+  , ("bulgarian", \_ -> simpleLang "bg")
+  , ("bengali", \_ -> simpleLang "bn")
+  , ("tibetan", \_ -> simpleLang "bo")
+  , ("breton", \_ -> simpleLang "br")
+  , ("catalan", \_ -> simpleLang "ca")
+  , ("welsh", \_ -> simpleLang "cy")
+  , ("czech", \_ -> simpleLang "cs")
+  , ("coptic", \_ -> simpleLang "cop")
+  , ("danish", \_ -> simpleLang "da")
+  , ("divehi", \_ -> simpleLang "dv")
+  , ("esperanto", \_ -> simpleLang "eo")
+  , ("spanish", \_ -> simpleLang "es")
+  , ("estonian", \_ -> simpleLang "et")
+  , ("basque", \_ -> simpleLang "eu")
+  , ("farsi", \_ -> simpleLang "fa")
+  , ("finnish", \_ -> simpleLang "fi")
+  , ("french", \_ -> simpleLang "fr")
+  , ("friulan", \_ -> simpleLang "fur")
+  , ("irish", \_ -> simpleLang "ga")
+  , ("scottish", \_ -> simpleLang "gd")
+  , ("ethiopic", \_ -> simpleLang "gez")
+  , ("galician", \_ -> simpleLang "gl")
+  , ("hebrew", \_ -> simpleLang "he")
+  , ("hindi", \_ -> simpleLang "hi")
+  , ("croatian", \_ -> simpleLang "hr")
+  , ("magyar", \_ -> simpleLang "hu")
+  , ("armenian", \_ -> simpleLang "hy")
+  , ("interlingua", \_ -> simpleLang "ia")
+  , ("indonesian", \_ -> simpleLang "id")
+  , ("icelandic", \_ -> simpleLang "is")
+  , ("italian", \_ -> simpleLang "it")
+  , ("japanese", \_ -> simpleLang "jp")
+  , ("khmer", \_ -> simpleLang "km")
+  , ("kurmanji", \_ -> simpleLang "kmr")
+  , ("kannada", \_ -> simpleLang "kn")
+  , ("korean", \_ -> simpleLang "ko")
+  , ("lao", \_ -> simpleLang "lo")
+  , ("lithuanian", \_ -> simpleLang "lt")
+  , ("latvian", \_ -> simpleLang "lv")
+  , ("malayalam", \_ -> simpleLang "ml")
+  , ("mongolian", \_ -> simpleLang "mn")
+  , ("marathi", \_ -> simpleLang "mr")
+  , ("dutch", \_ -> simpleLang "nl")
+  , ("nynorsk", \_ -> simpleLang "nn")
+  , ("norsk", \_ -> simpleLang "no")
+  , ("nko", \_ -> simpleLang "nqo")
+  , ("occitan", \_ -> simpleLang "oc")
+  , ("panjabi", \_ -> simpleLang "pa")
+  , ("polish", \_ -> simpleLang "pl")
+  , ("piedmontese", \_ -> simpleLang "pms")
+  , ("portuguese", \_ -> simpleLang "pt")
+  , ("romansh", \_ -> simpleLang "rm")
+  , ("romanian", \_ -> simpleLang "ro")
+  , ("russian", \_ -> simpleLang "ru")
+  , ("sanskrit", \_ -> simpleLang "sa")
+  , ("samin", \_ -> simpleLang "se")
+  , ("slovak", \_ -> simpleLang "sk")
+  , ("albanian", \_ -> simpleLang "sq")
+  , ("serbian", \_ -> simpleLang "sr")
+  , ("swedish", \_ -> simpleLang "sv")
+  , ("syriac", \_ -> simpleLang "syr")
+  , ("tamil", \_ -> simpleLang "ta")
+  , ("telugu", \_ -> simpleLang "te")
+  , ("thai", \_ -> simpleLang "th")
+  , ("turkmen", \_ -> simpleLang "tk")
+  , ("turkish", \_ -> simpleLang "tr")
+  , ("ukrainian", \_ -> simpleLang "uk")
+  , ("urdu", \_ -> simpleLang "ur")
+  , ("vietnamese", \_ -> simpleLang "vi")
   ]
 
+simpleLang :: Text -> Lang
+simpleLang l = Lang l Nothing Nothing [] [] []
+
 babelLangToBCP47 :: T.Text -> Maybe Lang
 babelLangToBCP47 s =
   case s of
-       "austrian" -> Just $ Lang "de" "" "AT" ["1901"]
-       "naustrian" -> Just $ Lang "de" "" "AT" []
-       "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"]
-       "nswissgerman" -> Just $ Lang "de" "" "CH" []
-       "german" -> Just $ Lang "de" "" "DE" ["1901"]
-       "ngerman" -> Just $ Lang "de" "" "DE" []
-       "lowersorbian" -> Just $ Lang "dsb" "" "" []
-       "uppersorbian" -> Just $ Lang "hsb" "" "" []
-       "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"]
-       "slovene" -> Just $ Lang "sl" "" "" []
-       "australian" -> Just $ Lang "en" "" "AU" []
-       "canadian" -> Just $ Lang "en" "" "CA" []
-       "british" -> Just $ Lang "en" "" "GB" []
-       "newzealand" -> Just $ Lang "en" "" "NZ" []
-       "american" -> Just $ Lang "en" "" "US" []
-       "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"]
+       "austrian" -> Just $ Lang "de" Nothing (Just "AT") ["1901"] [] []
+       "naustrian" -> Just $ Lang "de" Nothing (Just "AT") [] [] []
+       "swissgerman" -> Just $ Lang "de" Nothing (Just "CH") ["1901"] [] []
+       "nswissgerman" -> Just $ Lang "de" Nothing (Just "CH") [] [] []
+       "german" -> Just $ Lang "de" Nothing (Just "DE") ["1901"] [] []
+       "ngerman" -> Just $ Lang "de" Nothing (Just "DE") [] [] []
+       "lowersorbian" -> Just $ Lang "dsb" Nothing Nothing [] [] []
+       "uppersorbian" -> Just $ Lang "hsb" Nothing Nothing [] [] []
+       "polutonikogreek" -> Just $ Lang "el" Nothing Nothing ["polyton"] [] []
+       "slovene" -> Just $ simpleLang "sl"
+       "australian" -> Just $ Lang "en" Nothing (Just "AU") [] [] []
+       "canadian" -> Just $ Lang "en" Nothing (Just "CA") [] [] []
+       "british" -> Just $ Lang "en" Nothing (Just "GB") [] [] []
+       "newzealand" -> Just $ Lang "en" Nothing (Just "NZ") [] [] []
+       "american" -> Just $ Lang "en" Nothing (Just "US") [] [] []
+       "classiclatin" -> Just $ Lang "la" Nothing Nothing ["x-classic"] [] []
        _ -> ($ "") <$> M.lookup s polyglossiaLangToBCP47
diff --git a/src/Text/Pandoc/Writers/BibTeX.hs b/src/Text/Pandoc/Writers/BibTeX.hs
index b9ae0c13a..95de6b71f 100644
--- a/src/Text/Pandoc/Writers/BibTeX.hs
+++ b/src/Text/Pandoc/Writers/BibTeX.hs
@@ -43,7 +43,7 @@ writeBibTeX' :: PandocMonad m => Variant -> WriterOptions -> Pandoc -> m Text
 writeBibTeX' variant opts (Pandoc meta _) = do
   let mblang = case lookupMetaString "lang" meta of
                  "" -> Nothing
-                 t  -> Just $ parseLang t
+                 t  -> either (const Nothing) Just $ parseLang t
   let refs = case lookupMeta "references" meta of
                Just (MetaList xs) -> mapMaybe metaValueToReference xs
                _ -> []
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 3c9975be8..f352c84bc 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -21,7 +21,7 @@ import Data.Maybe (mapMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Network.URI (unEscapeString)
-import Text.Pandoc.BCP47
+import UnicodeCollation.Lang (Lang(..))
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import Text.Pandoc.Definition
 import Text.Pandoc.ImageSize
@@ -555,26 +555,26 @@ fromBCP47 mbs = fromBCP47' <$> toLang mbs
 -- https://tools.ietf.org/html/bcp47#section-2.1
 -- http://wiki.contextgarden.net/Language_Codes
 fromBCP47' :: Maybe Lang -> Maybe Text
-fromBCP47' (Just (Lang "ar" _ "SY" _)     ) = Just "ar-sy"
-fromBCP47' (Just (Lang "ar" _ "IQ" _)     ) = Just "ar-iq"
-fromBCP47' (Just (Lang "ar" _ "JO" _)     ) = Just "ar-jo"
-fromBCP47' (Just (Lang "ar" _ "LB" _)     ) = Just "ar-lb"
-fromBCP47' (Just (Lang "ar" _ "DZ" _)     ) = Just "ar-dz"
-fromBCP47' (Just (Lang "ar" _ "MA" _)     ) = Just "ar-ma"
-fromBCP47' (Just (Lang "de" _ _ ["1901"]) ) = Just "deo"
-fromBCP47' (Just (Lang "de" _ "DE" _)     ) = Just "de-de"
-fromBCP47' (Just (Lang "de" _ "AT" _)     ) = Just "de-at"
-fromBCP47' (Just (Lang "de" _ "CH" _)     ) = Just "de-ch"
-fromBCP47' (Just (Lang "el" _ _ ["poly"]) ) = Just "agr"
-fromBCP47' (Just (Lang "en" _ "US" _)     ) = Just "en-us"
-fromBCP47' (Just (Lang "en" _ "GB" _)     ) = Just "en-gb"
-fromBCP47' (Just (Lang "grc"_  _ _)       ) = Just "agr"
-fromBCP47' (Just (Lang "el" _ _ _)        ) = Just "gr"
-fromBCP47' (Just (Lang "eu" _ _ _)        ) = Just "ba"
-fromBCP47' (Just (Lang "he" _ _ _)        ) = Just "il"
-fromBCP47' (Just (Lang "jp" _ _ _)        ) = Just "ja"
-fromBCP47' (Just (Lang "uk" _ _ _)        ) = Just "ua"
-fromBCP47' (Just (Lang "vi" _ _ _)        ) = Just "vn"
-fromBCP47' (Just (Lang "zh" _ _ _)        ) = Just "cn"
-fromBCP47' (Just (Lang l _ _ _)           ) = Just l
-fromBCP47' Nothing                          = Nothing
+fromBCP47' (Just (Lang "ar" _ (Just "SY") _ _ _)) = Just "ar-sy"
+fromBCP47' (Just (Lang "ar" _ (Just "IQ") _ _ _)) = Just "ar-iq"
+fromBCP47' (Just (Lang "ar" _ (Just "JO") _ _ _)) = Just "ar-jo"
+fromBCP47' (Just (Lang "ar" _ (Just "LB") _ _ _)) = Just "ar-lb"
+fromBCP47' (Just (Lang "ar" _ (Just "DZ") _ _ _)) = Just "ar-dz"
+fromBCP47' (Just (Lang "ar" _ (Just "MA") _ _ _)) = Just "ar-ma"
+fromBCP47' (Just (Lang "de" _ _ ["1901"] _ _))    = Just "deo"
+fromBCP47' (Just (Lang "de" _ (Just "DE") _ _ _)) = Just "de-de"
+fromBCP47' (Just (Lang "de" _ (Just "AT") _ _ _)) = Just "de-at"
+fromBCP47' (Just (Lang "de" _ (Just "CH") _ _ _)) = Just "de-ch"
+fromBCP47' (Just (Lang "el" _ _ ["poly"] _ _))    = Just "agr"
+fromBCP47' (Just (Lang "en" _ (Just "US") _ _ _)) = Just "en-us"
+fromBCP47' (Just (Lang "en" _ (Just "GB") _ _ _)) = Just "en-gb"
+fromBCP47' (Just (Lang "grc"_ _ _ _ _))           = Just "agr"
+fromBCP47' (Just (Lang "el" _ _ _ _ _))           = Just "gr"
+fromBCP47' (Just (Lang "eu" _ _ _ _ _))           = Just "ba"
+fromBCP47' (Just (Lang "he" _ _ _ _ _))           = Just "il"
+fromBCP47' (Just (Lang "jp" _ _ _ _ _))           = Just "ja"
+fromBCP47' (Just (Lang "uk" _ _ _ _ _))           = Just "ua"
+fromBCP47' (Just (Lang "vi" _ _ _ _ _))           = Just "vn"
+fromBCP47' (Just (Lang "zh" _ _ _ _ _))           = Just "cn"
+fromBCP47' (Just (Lang l _ _ _ _ _))              = Just l
+fromBCP47' Nothing                                = Nothing
diff --git a/src/Text/Pandoc/Writers/CslJson.hs b/src/Text/Pandoc/Writers/CslJson.hs
index a10def95e..395335667 100644
--- a/src/Text/Pandoc/Writers/CslJson.hs
+++ b/src/Text/Pandoc/Writers/CslJson.hs
@@ -34,15 +34,16 @@ import Control.Monad.Identity
 import Citeproc.Locale (getLocale)
 import Citeproc.CslJson
 import Text.Pandoc.Options (WriterOptions)
-import Data.Maybe (mapMaybe)
+import Data.Maybe (mapMaybe, fromMaybe)
 import Data.Aeson.Encode.Pretty         (Config (..), Indent (Spaces),
                                          NumberFormat (Generic),
                                          defConfig, encodePretty')
 
 writeCslJson :: PandocMonad m => WriterOptions -> Pandoc -> m Text
 writeCslJson _opts (Pandoc meta _) = do
-  let lang = maybe (Lang "en" (Just "US")) parseLang
-               (lookupMeta "lang" meta >>= metaValueToText)
+  let lang = fromMaybe (Lang "en" Nothing (Just "US") [] [] [])
+               (lookupMeta "lang" meta >>= metaValueToText >>=
+                  either (const Nothing) Just . parseLang)
   locale <- case getLocale lang of
                Left e  -> throwError $ PandocCiteprocError e
                Right l -> return l
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 20bcd0324..7781df8e7 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -36,7 +36,7 @@ import qualified Data.Text.Lazy as TL
 import Data.Time.Clock.POSIX
 import Data.Digest.Pure.SHA (sha1, showDigest)
 import Skylighting
-import Text.Pandoc.BCP47 (getLang, renderLang)
+import UnicodeCollation.Lang (renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Data.Time
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 1c970e6ad..e99bad738 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -29,7 +29,7 @@ import qualified Data.Text as T
 import Network.URI (unEscapeString)
 import Text.DocTemplates (FromContext(lookupContext), renderTemplate,
                           Val(..), Context(..))
-import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang)
+import UnicodeCollation.Lang (Lang (..), renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import Text.Pandoc.Definition
 import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight,
diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs
index 871b2692a..437b84120 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs
@@ -46,7 +46,7 @@ toPolyglossia (Lang "de" _ (Just "AT") vars _ _)
 toPolyglossia (Lang "de" _ (Just "AT") _ _ _)  = ("german", "variant=austrian")
 toPolyglossia (Lang "de" _ (Just "CH") vars _ _)
   | "1901" `elem` vars        = ("german", "variant=swiss, spelling=old")
-toPolyglossia (Lang "de" _ (Just "CH") _ _ _ _) = ("german", "variant=swiss")
+toPolyglossia (Lang "de" _ (Just "CH") _ _ _) = ("german", "variant=swiss")
 toPolyglossia (Lang "de" _ _ _ _ _)           = ("german", "")
 toPolyglossia (Lang "dsb" _ _ _ _ _)          = ("lsorbian", "")
 toPolyglossia (Lang "el" _ _ vars _ _)
@@ -61,9 +61,9 @@ toPolyglossia (Lang "grc" _ _ _ _ _)          = ("greek",   "variant=ancient")
 toPolyglossia (Lang "hsb" _ _ _ _ _)          = ("usorbian", "")
 toPolyglossia (Lang "la" _ _ vars _ _)
   | "x-classic" `elem` vars                   = ("latin", "variant=classic")
-toPolyglossia (Lang "pt" _ "BR" _ _ _)        = ("portuguese", "variant=brazilian")
+toPolyglossia (Lang "pt" _ (Just "BR") _ _ _) = ("portuguese", "variant=brazilian")
 toPolyglossia (Lang "sl" _ _ _ _ _)           = ("slovenian", "")
-toPolyglossia x                           = (commonFromBcp47 x, "")
+toPolyglossia x                               = (commonFromBcp47 x, "")
 
 -- Takes a list of the constituents of a BCP47 language code and
 -- converts it to a Babel language string.
@@ -81,7 +81,7 @@ toBabel (Lang "de" _ _ vars _ _)
   | "1901" `elem` vars                  = "german"
   | otherwise                           = "ngerman"
 toBabel (Lang "dsb" _ _ _ _ _)          = "lowersorbian"
-toBabel (Lang "el" _ _ vars)
+toBabel (Lang "el" _ _ vars _ _)
   | "polyton" `elem` vars               = "polutonikogreek"
 toBabel (Lang "en" _ (Just "AU") _ _ _) = "australian"
 toBabel (Lang "en" _ (Just "CA") _ _ _) = "canadian"
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 101b236aa..6fd4cdeb4 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -16,6 +16,7 @@ import Codec.Archive.Zip
 import Control.Monad.Except (catchError, throwError)
 import Control.Monad.State.Strict
 import qualified Data.ByteString.Lazy as B
+import Data.Maybe (fromMaybe)
 import Data.Generics (everywhere', mkT)
 import Data.List (isPrefixOf)
 import qualified Data.Map as Map
@@ -23,7 +24,7 @@ import qualified Data.Text as T
 import qualified Data.Text.Lazy as TL
 import Data.Time
 import System.FilePath (takeDirectory, takeExtension, (<.>))
-import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang)
+import UnicodeCollation.Lang (Lang (..), renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Definition
@@ -35,7 +36,7 @@ import Text.Pandoc.Options (WrapOption (..), WriterOptions (..))
 import Text.DocLayout
 import Text.Pandoc.Shared (stringify, pandocVersion, tshow)
 import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks,
-                                   fixDisplayMath)
+                                   fixDisplayMath, getLang)
 import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy)
 import Text.Pandoc.Walk
 import Text.Pandoc.Writers.OpenDocument (writeOpenDocument)
@@ -194,7 +195,7 @@ addLang lang = everywhere' (mkT updateLangAttr)
     where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _)
                            = Attr n (langLanguage lang)
           updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _)
-                           = Attr n (langRegion lang)
+                           = Attr n (fromMaybe "" $ langRegion lang)
           updateLangAttr x = x
 
 -- | transform both Image and Math elements
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index cf42f2228..6c265090c 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -25,7 +25,7 @@ import Data.Ord (comparing)
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
-import Text.Pandoc.BCP47 (Lang (..), parseBCP47)
+import UnicodeCollation.Lang (Lang (..), parseLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, translateTerm,
                                       setTranslations, toLang)
 import Text.Pandoc.Definition
@@ -236,7 +236,7 @@ handleSpaces s = case T.uncons s of
 -- | Convert Pandoc document to string in OpenDocument format.
 writeOpenDocument :: PandocMonad m => WriterOptions -> Pandoc -> m Text
 writeOpenDocument opts (Pandoc meta blocks) = do
-  let defLang = Lang "en" "US" "" []
+  let defLang = Lang "en" (Just "US") Nothing [] [] []
   lang <- case lookupMetaString "lang" meta of
             "" -> pure defLang
             s  -> fromMaybe defLang <$> toLang (Just s)
@@ -893,7 +893,7 @@ textStyleAttr m s
                     Map.insert "style:font-name-complex" "Courier New" $ m
     | Language lang <- s
                   = Map.insert "fo:language" (langLanguage lang) .
-                    Map.insert "fo:country" (langRegion lang) $ m
+                    maybe id (Map.insert "fo:country") (langRegion lang) $ m
     | otherwise   = m
 
 withLangFromAttr :: PandocMonad m => Attr -> OD m a -> OD m a
@@ -901,7 +901,7 @@ withLangFromAttr (_,_,kvs) action =
   case lookup "lang" kvs of
        Nothing -> action
        Just l  ->
-         case parseBCP47 l of
+         case parseLang l of
               Right lang -> withTextStyle (Language lang) action
               Left _ -> do
                 report $ InvalidLang l
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index fcb47bd5a..a09d18571 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -149,7 +149,7 @@ defField field val (Context m) =
     f _newval oldval = oldval
 
 -- | Get the contents of the `lang` metadata field or variable.
-getLang :: WriterOptions -> Meta -> Maybe Text
+getLang :: WriterOptions -> Meta -> Maybe T.Text
 getLang opts meta =
   case lookupContext "lang" (writerVariables opts) of
         Just s -> Just s
author	John MacFarlane <jgm@berkeley.edu>	2021-04-11 21:28:48 -0700
committer	John MacFarlane <jgm@berkeley.edu>	2021-04-17 16:15:14 -0700
commit	aecbf8156eb7c36c4b41de27797e262c23728db5 (patch)
tree	2c9fe9de41a0f7037485dacee444b36cc2ccc110 /src/Text
parent	7ba8c0d2a5e2b89ae1547759510b2ee21de88cb1 (diff)
download	pandoc-aecbf8156eb7c36c4b41de27797e262c23728db5.tar.gz