aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2017-06-26 15:03:51 +0200
committerJohn MacFarlane <jgm@berkeley.edu>2017-06-26 15:03:51 +0200
commit700a0843b2310c6b319bf34d2aebd8470cc76b40 (patch)
treed5423f4c89bb52d2acbd7e4e8f0b157a8f90f6fe /src/Text
parentf09473eab70f3d540fe1586c0256336ab9679049 (diff)
downloadpandoc-700a0843b2310c6b319bf34d2aebd8470cc76b40.tar.gz
parseBCP47: Parse extensions and private-use as variants.
Even though officially they aren't. This suffices for our purposes.
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/BCP47.hs24
1 files changed, 20 insertions, 4 deletions
diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs
index 16dd3a032..b4b55c5d4 100644
--- a/src/Text/Pandoc/BCP47.hs
+++ b/src/Text/Pandoc/BCP47.hs
@@ -36,7 +36,8 @@ module Text.Pandoc.BCP47 (
)
where
import Control.Monad (guard)
-import Data.Char (isAscii, isLetter, isUpper, isLower, toUpper, toLower)
+import Data.Char (isAscii, isLetter, isUpper, isLower, toUpper, toLower,
+ isAlphaNum)
import Data.List (intercalate)
import Text.Pandoc.Definition
import Text.Pandoc.Class (PandocMonad, report)
@@ -78,7 +79,9 @@ toLang (Just s) =
return Nothing
Right l -> return (Just l)
--- | Parse a BCP 47 string as a Lang.
+-- | Parse a BCP 47 string as a Lang. Currently we parse
+-- extensions and private-use fields as "variants," even
+-- though officially they aren't.
parseBCP47 :: String -> Either String Lang
parseBCP47 lang =
case P.parse bcp47 "lang" lang of
@@ -88,8 +91,8 @@ parseBCP47 lang =
language <- pLanguage
script <- P.option "" pScript
region <- P.option "" pRegion
- variants <- P.many pVariant
- () <$ P.char '-' P.<|> P.eof
+ variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse)
+ P.eof
return $ Lang{ langLanguage = language
, langScript = script
, langRegion = region
@@ -121,3 +124,16 @@ parseBCP47 lang =
then length var >= 5 && length var <= 8
else length var == 4
return $ map toLower var
+ pExtension = P.try $ do
+ P.char '-'
+ cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
+ guard $ length cs >= 2 && length cs <= 8
+ return $ map toLower cs
+ pPrivateUse = P.try $ do
+ P.char '-'
+ P.char 'x'
+ P.char '-'
+ cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
+ guard $ length cs >= 1 && length cs <= 8
+ let var = "x-" ++ cs
+ return $ map toLower var