aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README20
-rw-r--r--src/Text/Pandoc/Writers/ConTeXt.hs40
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs148
3 files changed, 192 insertions, 16 deletions
diff --git a/README b/README
index 5a75c5a0b..da9bfea03 100644
--- a/README
+++ b/README
@@ -945,7 +945,19 @@ as `title`, `author`, and `date`) as well as the following:
: body of document
`lang`
-: language code for HTML or LaTeX documents
+: The `lang` variable should be set by the user to a language
+ code according to [BCP 47] (e.g. `en` or `en-GB`).
+ For some output formats, pandoc will convert it to an approriate
+ format stored in the additional variables `babel-lang`,
+ `polyglossia-lang`, `polyglossia-variant` (LaTeX)
+ and `context-lang` (ConTeXt).
+
+`otherlangs`
+: Should be set to a list of other languages used in the document
+ in the YAML metadata, according to [BCP 47]. For example:
+ `otherlangs: [en-GB, fr]`.
+ Currently only used by XeTeX through the generated
+ `polyglossia-otherlangs` variable.
`slidy-url`
: base URL for Slidy documents (defaults to
@@ -3264,8 +3276,8 @@ The following fields are recognized:
~ A string value in `YYYY-MM-DD` format. (Only the year is necessary.)
Pandoc will attempt to convert other common date formats.
-`language`
- ~ A string value in [RFC5646] format. Pandoc will default to the local
+`lang` (or legacy: `language`)
+ ~ A string value in [BCP 47] format. Pandoc will default to the local
language if nothing is specified.
`subject`
@@ -3549,7 +3561,7 @@ Xavier Olive.
[FictionBook2]: http://www.fictionbook.org/index.php/Eng:XML_Schema_Fictionbook_2.1
[lua]: http://www.lua.org
[marc relators]: http://www.loc.gov/marc/relators/relaterm.html
-[RFC5646]: http://tools.ietf.org/html/rfc5646
+[BCP 47]: https://tools.ietf.org/html/bcp47
[InDesign ICML]: https://www.adobe.com/content/dam/Adobe/en/devnet/indesign/cs55-docs/IDML/idml-specification.pdf
[txt2tags]: http://txt2tags.org/
[EPUB]: http://idpf.org/epub
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 1f8bbcdba..5e2d7cfee 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -80,12 +80,12 @@ pandocToConTeXt options (Pandoc meta blocks) = do
"subsubsubsection","subsubsubsubsection"])
$ defField "body" main
$ defField "number-sections" (writerNumberSections options)
- $ defField "mainlang" (maybe ""
- (reverse . takeWhile (/=',') . reverse)
- (lookup "lang" $ writerVariables options))
$ metadata
+ let context' = defField "context-lang" (maybe "" (fromBcp47 . splitBy (=='-')) $
+ getField "lang" context)
+ context
return $ if writerStandalone options
- then renderTemplate' (writerTemplate options) context
+ then renderTemplate' (writerTemplate options) context'
else main
-- escape things as needed for ConTeXt
@@ -362,3 +362,35 @@ sectionHeader (ident,classes,_) hdrLevel lst = do
then char '\\' <> chapter <> braces contents
else contents <> blankline
+-- Takes a list of the constituents of a BCP 47 language code
+-- and irons out ConTeXt's exceptions
+-- https://tools.ietf.org/html/bcp47#section-2.1
+-- http://wiki.contextgarden.net/Language_Codes
+fromBcp47 :: [String] -> String
+fromBcp47 [] = ""
+fromBcp47 ("ar":"SY":_) = "ar-sy"
+fromBcp47 ("ar":"IQ":_) = "ar-iq"
+fromBcp47 ("ar":"JO":_) = "ar-jo"
+fromBcp47 ("ar":"LB":_) = "ar-lb"
+fromBcp47 ("ar":"DZ":_) = "ar-dz"
+fromBcp47 ("ar":"MA":_) = "ar-ma"
+fromBcp47 ("de":"1901":_) = "deo"
+fromBcp47 ("de":"DE":_) = "de-de"
+fromBcp47 ("de":"AT":_) = "de-at"
+fromBcp47 ("de":"CH":_) = "de-ch"
+fromBcp47 ("el":"poly":_) = "agr"
+fromBcp47 ("en":"US":_) = "en-us"
+fromBcp47 ("en":"GB":_) = "en-gb"
+fromBcp47 ("grc":_) = "agr"
+fromBcp47 x = fromIso $ head x
+ where
+ fromIso "cz" = "cs"
+ fromIso "el" = "gr"
+ fromIso "eu" = "ba"
+ fromIso "he" = "il"
+ fromIso "jp" = "ja"
+ fromIso "uk" = "ua"
+ fromIso "vi" = "vn"
+ fromIso "zh" = "cn"
+ fromIso l = l
+
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 506edd182..6a30efbf5 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -144,11 +144,6 @@ pandocToLaTeX options (Pandoc meta blocks) = do
st <- get
titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta
authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta
- let (mainlang, otherlang) =
- case (reverse . splitBy (==',') . filter (/=' ')) `fmap`
- getField "lang" metadata of
- Just (m:os) -> (m, reverse os)
- _ -> ("", [])
let context = defField "toc" (writerTableOfContents options) $
defField "toc-depth" (show (writerTOCDepth options -
if stBook st
@@ -173,8 +168,6 @@ pandocToLaTeX options (Pandoc meta blocks) = do
defField "euro" (stUsesEuro st) $
defField "listings" (writerListings options || stLHS st) $
defField "beamer" (writerBeamer options) $
- defField "mainlang" mainlang $
- defField "otherlang" otherlang $
(if stHighlighting st
then defField "highlighting-macros" (styleToLaTeX
$ writerHighlightStyle options )
@@ -186,8 +179,18 @@ pandocToLaTeX options (Pandoc meta blocks) = do
defField "biblatex" True
_ -> id) $
metadata
+ let lang = maybe [] (splitBy (=='-')) $ getField "lang" context
+ (polyLang, polyVar) = toPolyglossia lang
+ let context' =
+ defField "babel-lang" (toBabel lang)
+ $ defField "polyglossia-lang" polyLang
+ $ defField "polyglossia-variant" polyVar
+ $ defField "polyglossia-otherlangs"
+ (maybe [] (map $ fst . toPolyglossia . splitBy (=='-')) $
+ getField "otherlangs" context)
+ $ context
return $ if writerStandalone options
- then renderTemplate' template context
+ then renderTemplate' template context'
else main
-- | Convert Elements to LaTeX
@@ -980,3 +983,132 @@ citationsToBiblatex _ = return empty
getListingsLanguage :: [String] -> Maybe String
getListingsLanguage [] = Nothing
getListingsLanguage (x:xs) = toListingsLanguage x <|> getListingsLanguage xs
+
+-- Takes a list of the constituents of a BCP 47 language code and
+-- converts it to a Polyglossia (language, variant) tuple
+-- http://mirrors.concertpass.com/tex-archive/macros/latex/contrib/polyglossia/polyglossia.pdf
+toPolyglossia :: [String] -> (String, String)
+toPolyglossia ("de":"AT":_) = ("german", "austrian")
+toPolyglossia ("de":"CH":_) = ("german", "swiss")
+toPolyglossia ("de":_) = ("german", "")
+toPolyglossia ("dsb":_) = ("lsorbian", "")
+toPolyglossia ("el":"poly":_) = ("greek", "poly")
+toPolyglossia ("en":"AU":_) = ("english", "australian")
+toPolyglossia ("en":"CA":_) = ("english", "canadian")
+toPolyglossia ("en":"GB":_) = ("english", "british")
+toPolyglossia ("en":"NZ":_) = ("english", "newzealand")
+toPolyglossia ("en":"UK":_) = ("english", "british")
+toPolyglossia ("en":"US":_) = ("english", "american")
+toPolyglossia ("grc":_) = ("greek", "ancient")
+toPolyglossia ("hsb":_) = ("usorbian", "")
+toPolyglossia ("sl":_) = ("slovenian", "")
+toPolyglossia x = (commonFromBcp47 x, "")
+
+-- Takes a list of the constituents of a BCP 47 language code and
+-- converts it to a Babel language string.
+-- http://mirrors.concertpass.com/tex-archive/macros/latex/required/babel/base/babel.pdf
+-- Note that the PDF unfortunately does not contain a complete list of supported languages.
+toBabel :: [String] -> String
+toBabel ("de":"1901":_) = "german"
+toBabel ("de":"AT":"1901":_) = "austrian"
+toBabel ("de":"AT":_) = "naustrian"
+toBabel ("de":_) = "ngerman"
+toBabel ("dsb":_) = "lowersorbian"
+toBabel ("el":"poly":_) = "polutonikogreek"
+toBabel ("en":"AU":_) = "australian"
+toBabel ("en":"CA":_) = "canadian"
+toBabel ("en":"GB":_) = "british"
+toBabel ("en":"NZ":_) = "newzealand"
+toBabel ("en":"UK":_) = "british"
+toBabel ("en":"US":_) = "american"
+toBabel ("fr":"CA":_) = "canadien"
+toBabel ("fra":"aca":_) = "acadian"
+toBabel ("grc":_) = "polutonikogreek"
+toBabel ("hsb":_) = "uppersorbian"
+toBabel ("sl":_) = "slovene"
+toBabel x = commonFromBcp47 x
+
+-- Takes a list of the constituents of a BCP 47 language code
+-- and converts it to a string shared by Babel and Polyglossia.
+-- https://tools.ietf.org/html/bcp47#section-2.1
+commonFromBcp47 :: [String] -> String
+commonFromBcp47 [] = ""
+commonFromBcp47 ("pt":"BR":_) = "brazilian"
+commonFromBcp47 x = fromIso $ head x
+ where
+ fromIso "af" = "afrikaans"
+ fromIso "am" = "amharic"
+ fromIso "ar" = "arabic"
+ fromIso "ast" = "asturian"
+ fromIso "bg" = "bulgarian"
+ fromIso "bn" = "bengali"
+ fromIso "bo" = "tibetan"
+ fromIso "br" = "breton"
+ fromIso "ca" = "catalan"
+ fromIso "cy" = "welsh"
+ fromIso "cz" = "czech"
+ fromIso "cop" = "coptic"
+ fromIso "da" = "danish"
+ fromIso "dv" = "divehi"
+ fromIso "el" = "greek"
+ fromIso "en" = "english"
+ fromIso "eo" = "esperanto"
+ fromIso "es" = "spanish"
+ fromIso "et" = "estonian"
+ fromIso "eu" = "basque"
+ fromIso "fa" = "farsi"
+ fromIso "fi" = "finnish"
+ fromIso "fr" = "french"
+ fromIso "fur" = "friulan"
+ fromIso "ga" = "irish"
+ fromIso "gd" = "scottish"
+ fromIso "gl" = "galician"
+ fromIso "he" = "hebrew"
+ fromIso "hi" = "hindi"
+ fromIso "hr" = "croatian"
+ fromIso "hy" = "armenian"
+ fromIso "hu" = "magyar"
+ fromIso "ia" = "interlingua"
+ fromIso "id" = "indonesian"
+ fromIso "ie" = "interlingua"
+ fromIso "is" = "icelandic"
+ fromIso "it" = "italian"
+ fromIso "jp" = "japanese"
+ fromIso "km" = "khmer"
+ fromIso "kn" = "kannada"
+ fromIso "ko" = "korean"
+ fromIso "la" = "latin"
+ fromIso "lo" = "lao"
+ fromIso "lt" = "lithuanian"
+ fromIso "lv" = "latvian"
+ fromIso "ml" = "malayalam"
+ fromIso "mn" = "mongolian"
+ fromIso "mr" = "marathi"
+ fromIso "nb" = "norsk"
+ fromIso "nl" = "dutch"
+ fromIso "nn" = "nynorsk"
+ fromIso "no" = "norsk"
+ fromIso "nqo" = "nko"
+ fromIso "oc" = "occitan"
+ fromIso "pl" = "polish"
+ fromIso "pms" = "piedmontese"
+ fromIso "pt" = "portuguese"
+ fromIso "rm" = "romansh"
+ fromIso "ro" = "romanian"
+ fromIso "ru" = "russian"
+ fromIso "sa" = "sanskrit"
+ fromIso "se" = "samin"
+ fromIso "sk" = "slovak"
+ fromIso "sq" = "albanian"
+ fromIso "sr" = "serbian"
+ fromIso "sv" = "swedish"
+ fromIso "syr" = "syriac"
+ fromIso "ta" = "tamil"
+ fromIso "te" = "telugu"
+ fromIso "th" = "thai"
+ fromIso "tk" = "turkmen"
+ fromIso "tr" = "turkish"
+ fromIso "uk" = "ukrainian"
+ fromIso "ur" = "urdu"
+ fromIso "vi" = "vietnamese"
+ fromIso _ = ""