aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/BCP47.hs
blob: a9fb5c7a71f8ff511d3b8756eae3fbfc6e5d9a4e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
{-
Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-}

{- |
   Module      : Text.Pandoc.BCP47
   Copyright   : Copyright (C) 2017 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Functions for parsing and rendering BCP47 language identifiers.
-}
module Text.Pandoc.BCP47 (
                       getLang
                     , parseBCP47
                     , Lang(..)
                     , renderLang
                     )
where
import Control.Monad (guard)
import Data.Char (isAlphaNum, isAscii, isLetter, isLower, isUpper, toLower,
                  toUpper)
import Data.List (intercalate)
import Text.Pandoc.Definition
import Text.Pandoc.Options
import qualified Text.Parsec as P

-- | Represents BCP 47 language/country code.
data Lang = Lang{ langLanguage :: String
                , langScript   :: String
                , langRegion   :: String
                , langVariants :: [String] }
                deriving (Eq, Ord, Show)

-- | Render a Lang as BCP 47.
renderLang :: Lang -> String
renderLang lang = intercalate "-" (langLanguage lang : filter (not . null)
                    ([langScript lang, langRegion lang] ++ langVariants lang))

-- | Get the contents of the `lang` metadata field or variable.
getLang :: WriterOptions -> Meta -> Maybe String
getLang opts meta =
  case lookup "lang" (writerVariables opts) of
        Just s -> Just s
        _      ->
          case lookupMeta "lang" meta of
               Just (MetaInlines [Str s]) -> Just s
               Just (MetaString s)        -> Just s
               _                          -> Nothing

-- | Parse a BCP 47 string as a Lang.  Currently we parse
-- extensions and private-use fields as "variants," even
-- though officially they aren't.
parseBCP47 :: String -> Either String Lang
parseBCP47 lang =
  case P.parse bcp47 "lang" lang of
       Right r -> Right r
       Left e  -> Left $ show e
  where bcp47 = do
          language <- pLanguage
          script <- P.option "" pScript
          region <- P.option "" pRegion
          variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse)
          P.eof
          return Lang{   langLanguage = language
                       , langScript = script
                       , langRegion = region
                       , langVariants = variants }
        asciiLetter = P.satisfy (\c -> isAscii c && isLetter c)
        pLanguage = do
          cs <- P.many1 asciiLetter
          let lcs = length cs
          guard $ lcs == 2 || lcs == 3
          return $ map toLower cs
        pScript = P.try $ do
          P.char '-'
          x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c)
          xs <- P.count 3
                 (P.satisfy (\c -> isAscii c && isLetter c && isLower c))
          return $ map toLower (x:xs)
        pRegion = P.try $ do
          P.char '-'
          cs <- P.many1 asciiLetter
          let lcs = length cs
          guard $ lcs == 2 || lcs == 3
          return $ map toUpper cs
        pVariant = P.try $ do
          P.char '-'
          ds <- P.option "" (P.count 1 P.digit)
          cs <- P.many1 asciiLetter
          let var = ds ++ cs
          guard $ if null ds
                     then length var >= 5 && length var <= 8
                     else length var == 4
          return $ map toLower var
        pExtension = P.try $ do
          P.char '-'
          cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
          guard $ length cs >= 2 && length cs <= 8
          return $ map toLower cs
        pPrivateUse = P.try $ do
          P.char '-'
          P.char 'x'
          P.char '-'
          cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
          guard $ not (null cs) && length cs <= 8
          let var = "x-" ++ cs
          return $ map toLower var