aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/BCP47.hs
blob: 69824aa57d37b7ad8418db2299f5b55385c4d295 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
{-# LANGUAGE OverloadedStrings #-}
{- |
   Module      : Text.Pandoc.BCP47
   Copyright   : Copyright (C) 2017-2021 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Functions for parsing and rendering BCP47 language identifiers.
-}
module Text.Pandoc.BCP47 (
                       getLang
                     , parseBCP47
                     , Lang(..)
                     , renderLang
                     )
where
import Control.Monad (guard)
import Data.Char (isAlphaNum, isAscii, isLetter, isLower, isUpper)
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.DocTemplates (FromContext(..))
import qualified Data.Text as T
import qualified Text.Parsec as P

-- | Represents BCP 47 language/country code.
data Lang = Lang{ langLanguage :: T.Text
                , langScript   :: T.Text
                , langRegion   :: T.Text
                , langVariants :: [T.Text] }
                deriving (Eq, Ord, Show)

-- | Render a Lang as BCP 47.
renderLang :: Lang -> T.Text
renderLang lang = T.intercalate "-" (langLanguage lang : filter (not . T.null)
                    ([langScript lang, langRegion lang] ++ langVariants lang))

-- | Get the contents of the `lang` metadata field or variable.
getLang :: WriterOptions -> Meta -> Maybe T.Text
getLang opts meta =
  case lookupContext "lang" (writerVariables opts) of
        Just s -> Just s
        _      ->
          case lookupMeta "lang" meta of
               Just (MetaBlocks [Para [Str s]])  -> Just s
               Just (MetaBlocks [Plain [Str s]]) -> Just s
               Just (MetaInlines [Str s])        -> Just s
               Just (MetaString s)               -> Just s
               _                                 -> Nothing

-- | Parse a BCP 47 string as a Lang.  Currently we parse
-- extensions and private-use fields as "variants," even
-- though officially they aren't.
parseBCP47 :: T.Text -> Either T.Text Lang
parseBCP47 lang =
  case P.parse bcp47 "lang" lang of
       Right r -> Right r
       Left e  -> Left $ T.pack $ show e
  where bcp47 = do
          language <- pLanguage
          script <- P.option "" pScript
          region <- P.option "" pRegion
          variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse)
          P.eof
          return Lang{   langLanguage = language
                       , langScript = script
                       , langRegion = region
                       , langVariants = variants }
        asciiLetter = P.satisfy (\c -> isAscii c && isLetter c)
        pLanguage = do
          cs <- P.many1 asciiLetter
          let lcs = length cs
          guard $ lcs == 2 || lcs == 3
          return $ T.toLower $ T.pack cs
        pScript = P.try $ do
          P.char '-'
          x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c)
          xs <- P.count 3
                 (P.satisfy (\c -> isAscii c && isLetter c && isLower c))
          return $ T.toLower $ T.pack (x:xs)
        pRegion = P.try $ do
          P.char '-'
          cs <- P.many1 asciiLetter
          let lcs = length cs
          guard $ lcs == 2 || lcs == 3
          return $ T.toUpper $ T.pack cs
        pVariant = P.try $ do
          P.char '-'
          ds <- P.option "" (P.count 1 P.digit)
          cs <- P.many1 asciiLetter
          let var = ds ++ cs
              lv = length var
          guard $ if null ds
                     then lv >= 5 && lv <= 8
                     else lv == 4
          return $ T.toLower $ T.pack var
        pExtension = P.try $ do
          P.char '-'
          cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
          let lcs = length cs
          guard $ lcs >= 2 && lcs <= 8
          return $ T.toLower $ T.pack cs
        pPrivateUse = P.try $ do
          P.char '-'
          P.char 'x'
          P.char '-'
          cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
          guard $ not (null cs) && length cs <= 8
          let var = "x-" ++ cs
          return $ T.toLower $ T.pack var