aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/BCP47.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/BCP47.hs')
-rw-r--r--src/Text/Pandoc/BCP47.hs117
1 files changed, 117 insertions, 0 deletions
diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs
new file mode 100644
index 000000000..ae7f54473
--- /dev/null
+++ b/src/Text/Pandoc/BCP47.hs
@@ -0,0 +1,117 @@
+{-
+Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.BCP47
+ Copyright : Copyright (C) 2017 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Functions for parsing and rendering BCP47 language identifiers.
+-}
+module Text.Pandoc.BCP47 (
+ getLang
+ , parseBCP47
+ , Lang(..)
+ , renderLang
+ )
+where
+import Control.Monad (guard)
+import Data.Char (isAscii, isLetter, isUpper, isLower)
+import Data.List (intercalate)
+import Text.Pandoc.Definition
+import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Logging
+import Text.Pandoc.Options
+import qualified Text.Parsec as P
+
+-- | Represents BCP 47 language/country code.
+data Lang = Lang{ langLanguage :: String
+ , langScript :: String
+ , langRegion :: String
+ , langVariants :: [String] }
+ deriving (Eq, Ord, Show)
+
+-- | Render a Lang as BCP 47.
+renderLang :: Lang -> String
+renderLang lang = intercalate "-" (langLanguage lang : filter (not . null)
+ ([langScript lang, langRegion lang] ++ langVariants lang))
+
+-- | Get the contents of the `lang` metadata field or variable.
+getLang :: PandocMonad m => WriterOptions -> Meta -> m (Maybe Lang)
+getLang opts meta = case
+ (case lookup "lang" (writerVariables opts) of
+ Just s -> Just s
+ _ ->
+ case lookupMeta "lang" meta of
+ Just (MetaInlines [Str s]) -> Just s
+ Just (MetaString s) -> Just s
+ _ -> Nothing) of
+ Nothing -> return Nothing
+ Just s -> case parseBCP47 s of
+ Left _ -> do
+ report $ InvalidLang s
+ return Nothing
+ Right l -> return (Just l)
+
+-- | Parse a BCP 47 string as a Lang.
+parseBCP47 :: String -> Either String Lang
+parseBCP47 lang =
+ case P.parse bcp47 "lang" lang of
+ Right r -> Right r
+ Left e -> Left $ show e
+ where bcp47 = do
+ language <- pLanguage
+ script <- P.option "" pScript
+ region <- P.option "" pRegion
+ variants <- P.many pVariant
+ () <$ P.char '-' P.<|> P.eof
+ return $ Lang{ langLanguage = language
+ , langScript = script
+ , langRegion = region
+ , langVariants = variants }
+ asciiLetter = P.satisfy (\c -> isAscii c && isLetter c)
+ pLanguage = do
+ cs <- P.many1 asciiLetter
+ let lcs = length cs
+ guard $ lcs == 2 || lcs == 3
+ return cs
+ pScript = P.try $ do
+ P.char '-'
+ x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c)
+ xs <- P.count 3
+ (P.satisfy (\c -> isAscii c && isLetter c && isLower c))
+ return (x:xs)
+ pRegion = P.try $ do
+ P.char '-'
+ cs <- P.many1 asciiLetter
+ let lcs = length cs
+ guard $ lcs == 2 || lcs == 3
+ return cs
+ pVariant = P.try $ do
+ P.char '-'
+ ds <- P.option "" (P.count 1 P.digit)
+ cs <- P.many1 asciiLetter
+ let var = ds ++ cs
+ guard $ if null ds
+ then length var >= 5 && length var <= 8
+ else length var == 4
+ return var