diff options
Diffstat (limited to 'src/Text/Pandoc/BCP47.hs')
-rw-r--r-- | src/Text/Pandoc/BCP47.hs | 117 |
1 files changed, 117 insertions, 0 deletions
diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs new file mode 100644 index 000000000..ae7f54473 --- /dev/null +++ b/src/Text/Pandoc/BCP47.hs @@ -0,0 +1,117 @@ +{- +Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.BCP47 + Copyright : Copyright (C) 2017 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Functions for parsing and rendering BCP47 language identifiers. +-} +module Text.Pandoc.BCP47 ( + getLang + , parseBCP47 + , Lang(..) + , renderLang + ) +where +import Control.Monad (guard) +import Data.Char (isAscii, isLetter, isUpper, isLower) +import Data.List (intercalate) +import Text.Pandoc.Definition +import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Logging +import Text.Pandoc.Options +import qualified Text.Parsec as P + +-- | Represents BCP 47 language/country code. +data Lang = Lang{ langLanguage :: String + , langScript :: String + , langRegion :: String + , langVariants :: [String] } + deriving (Eq, Ord, Show) + +-- | Render a Lang as BCP 47. +renderLang :: Lang -> String +renderLang lang = intercalate "-" (langLanguage lang : filter (not . null) + ([langScript lang, langRegion lang] ++ langVariants lang)) + +-- | Get the contents of the `lang` metadata field or variable. +getLang :: PandocMonad m => WriterOptions -> Meta -> m (Maybe Lang) +getLang opts meta = case + (case lookup "lang" (writerVariables opts) of + Just s -> Just s + _ -> + case lookupMeta "lang" meta of + Just (MetaInlines [Str s]) -> Just s + Just (MetaString s) -> Just s + _ -> Nothing) of + Nothing -> return Nothing + Just s -> case parseBCP47 s of + Left _ -> do + report $ InvalidLang s + return Nothing + Right l -> return (Just l) + +-- | Parse a BCP 47 string as a Lang. +parseBCP47 :: String -> Either String Lang +parseBCP47 lang = + case P.parse bcp47 "lang" lang of + Right r -> Right r + Left e -> Left $ show e + where bcp47 = do + language <- pLanguage + script <- P.option "" pScript + region <- P.option "" pRegion + variants <- P.many pVariant + () <$ P.char '-' P.<|> P.eof + return $ Lang{ langLanguage = language + , langScript = script + , langRegion = region + , langVariants = variants } + asciiLetter = P.satisfy (\c -> isAscii c && isLetter c) + pLanguage = do + cs <- P.many1 asciiLetter + let lcs = length cs + guard $ lcs == 2 || lcs == 3 + return cs + pScript = P.try $ do + P.char '-' + x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c) + xs <- P.count 3 + (P.satisfy (\c -> isAscii c && isLetter c && isLower c)) + return (x:xs) + pRegion = P.try $ do + P.char '-' + cs <- P.many1 asciiLetter + let lcs = length cs + guard $ lcs == 2 || lcs == 3 + return cs + pVariant = P.try $ do + P.char '-' + ds <- P.option "" (P.count 1 P.digit) + cs <- P.many1 asciiLetter + let var = ds ++ cs + guard $ if null ds + then length var >= 5 && length var <= 8 + else length var == 4 + return var |