diff options
author | John MacFarlane <jgm@berkeley.edu> | 2017-06-26 15:03:51 +0200 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2017-06-26 15:03:51 +0200 |
commit | 700a0843b2310c6b319bf34d2aebd8470cc76b40 (patch) | |
tree | d5423f4c89bb52d2acbd7e4e8f0b157a8f90f6fe /src/Text | |
parent | f09473eab70f3d540fe1586c0256336ab9679049 (diff) | |
download | pandoc-700a0843b2310c6b319bf34d2aebd8470cc76b40.tar.gz |
parseBCP47: Parse extensions and private-use as variants.
Even though officially they aren't. This suffices
for our purposes.
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc/BCP47.hs | 24 |
1 files changed, 20 insertions, 4 deletions
diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs index 16dd3a032..b4b55c5d4 100644 --- a/src/Text/Pandoc/BCP47.hs +++ b/src/Text/Pandoc/BCP47.hs @@ -36,7 +36,8 @@ module Text.Pandoc.BCP47 ( ) where import Control.Monad (guard) -import Data.Char (isAscii, isLetter, isUpper, isLower, toUpper, toLower) +import Data.Char (isAscii, isLetter, isUpper, isLower, toUpper, toLower, + isAlphaNum) import Data.List (intercalate) import Text.Pandoc.Definition import Text.Pandoc.Class (PandocMonad, report) @@ -78,7 +79,9 @@ toLang (Just s) = return Nothing Right l -> return (Just l) --- | Parse a BCP 47 string as a Lang. +-- | Parse a BCP 47 string as a Lang. Currently we parse +-- extensions and private-use fields as "variants," even +-- though officially they aren't. parseBCP47 :: String -> Either String Lang parseBCP47 lang = case P.parse bcp47 "lang" lang of @@ -88,8 +91,8 @@ parseBCP47 lang = language <- pLanguage script <- P.option "" pScript region <- P.option "" pRegion - variants <- P.many pVariant - () <$ P.char '-' P.<|> P.eof + variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse) + P.eof return $ Lang{ langLanguage = language , langScript = script , langRegion = region @@ -121,3 +124,16 @@ parseBCP47 lang = then length var >= 5 && length var <= 8 else length var == 4 return $ map toLower var + pExtension = P.try $ do + P.char '-' + cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c) + guard $ length cs >= 2 && length cs <= 8 + return $ map toLower cs + pPrivateUse = P.try $ do + P.char '-' + P.char 'x' + P.char '-' + cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c) + guard $ length cs >= 1 && length cs <= 8 + let var = "x-" ++ cs + return $ map toLower var |