From 700a0843b2310c6b319bf34d2aebd8470cc76b40 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 26 Jun 2017 15:03:51 +0200 Subject: parseBCP47: Parse extensions and private-use as variants. Even though officially they aren't. This suffices for our purposes. --- src/Text/Pandoc/BCP47.hs | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs index 16dd3a032..b4b55c5d4 100644 --- a/src/Text/Pandoc/BCP47.hs +++ b/src/Text/Pandoc/BCP47.hs @@ -36,7 +36,8 @@ module Text.Pandoc.BCP47 ( ) where import Control.Monad (guard) -import Data.Char (isAscii, isLetter, isUpper, isLower, toUpper, toLower) +import Data.Char (isAscii, isLetter, isUpper, isLower, toUpper, toLower, + isAlphaNum) import Data.List (intercalate) import Text.Pandoc.Definition import Text.Pandoc.Class (PandocMonad, report) @@ -78,7 +79,9 @@ toLang (Just s) = return Nothing Right l -> return (Just l) --- | Parse a BCP 47 string as a Lang. +-- | Parse a BCP 47 string as a Lang. Currently we parse +-- extensions and private-use fields as "variants," even +-- though officially they aren't. parseBCP47 :: String -> Either String Lang parseBCP47 lang = case P.parse bcp47 "lang" lang of @@ -88,8 +91,8 @@ parseBCP47 lang = language <- pLanguage script <- P.option "" pScript region <- P.option "" pRegion - variants <- P.many pVariant - () <$ P.char '-' P.<|> P.eof + variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse) + P.eof return $ Lang{ langLanguage = language , langScript = script , langRegion = region @@ -121,3 +124,16 @@ parseBCP47 lang = then length var >= 5 && length var <= 8 else length var == 4 return $ map toLower var + pExtension = P.try $ do + P.char '-' + cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c) + guard $ length cs >= 2 && length cs <= 8 + return $ map toLower cs + pPrivateUse = P.try $ do + P.char '-' + P.char 'x' + P.char '-' + cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c) + guard $ length cs >= 1 && length cs <= 8 + let var = "x-" ++ cs + return $ map toLower var -- cgit v1.2.3