From ac9423eccc76005f996a10a545594247ac753e02 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 25 Jun 2017 21:00:35 +0200 Subject: Moved BCP47 specific functions from Writers.Shared to new module. Text.Pandoc.BCP47 (unexported, internal module). `getLang`, `Lang(..)`, `parseBCP47`. --- pandoc.cabal | 1 + src/Text/Pandoc/BCP47.hs | 117 ++++++++++++++++++++++++++++++++ src/Text/Pandoc/Writers/Docx.hs | 3 +- src/Text/Pandoc/Writers/ODT.hs | 4 +- src/Text/Pandoc/Writers/OpenDocument.hs | 1 + src/Text/Pandoc/Writers/Shared.hs | 87 +----------------------- 6 files changed, 126 insertions(+), 87 deletions(-) create mode 100644 src/Text/Pandoc/BCP47.hs diff --git a/pandoc.cabal b/pandoc.cabal index 3b644c7d0..5ae255284 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -463,6 +463,7 @@ Library Text.Pandoc.Lua.Util, Text.Pandoc.CSS, Text.Pandoc.UUID, + Text.Pandoc.BCP47 Text.Pandoc.Slides, Text.Pandoc.Compat.Time, Paths_pandoc diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs new file mode 100644 index 000000000..ae7f54473 --- /dev/null +++ b/src/Text/Pandoc/BCP47.hs @@ -0,0 +1,117 @@ +{- +Copyright (C) 2017 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.BCP47 + Copyright : Copyright (C) 2017 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Functions for parsing and rendering BCP47 language identifiers. +-} +module Text.Pandoc.BCP47 ( + getLang + , parseBCP47 + , Lang(..) + , renderLang + ) +where +import Control.Monad (guard) +import Data.Char (isAscii, isLetter, isUpper, isLower) +import Data.List (intercalate) +import Text.Pandoc.Definition +import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Logging +import Text.Pandoc.Options +import qualified Text.Parsec as P + +-- | Represents BCP 47 language/country code. +data Lang = Lang{ langLanguage :: String + , langScript :: String + , langRegion :: String + , langVariants :: [String] } + deriving (Eq, Ord, Show) + +-- | Render a Lang as BCP 47. +renderLang :: Lang -> String +renderLang lang = intercalate "-" (langLanguage lang : filter (not . null) + ([langScript lang, langRegion lang] ++ langVariants lang)) + +-- | Get the contents of the `lang` metadata field or variable. +getLang :: PandocMonad m => WriterOptions -> Meta -> m (Maybe Lang) +getLang opts meta = case + (case lookup "lang" (writerVariables opts) of + Just s -> Just s + _ -> + case lookupMeta "lang" meta of + Just (MetaInlines [Str s]) -> Just s + Just (MetaString s) -> Just s + _ -> Nothing) of + Nothing -> return Nothing + Just s -> case parseBCP47 s of + Left _ -> do + report $ InvalidLang s + return Nothing + Right l -> return (Just l) + +-- | Parse a BCP 47 string as a Lang. +parseBCP47 :: String -> Either String Lang +parseBCP47 lang = + case P.parse bcp47 "lang" lang of + Right r -> Right r + Left e -> Left $ show e + where bcp47 = do + language <- pLanguage + script <- P.option "" pScript + region <- P.option "" pRegion + variants <- P.many pVariant + () <$ P.char '-' P.<|> P.eof + return $ Lang{ langLanguage = language + , langScript = script + , langRegion = region + , langVariants = variants } + asciiLetter = P.satisfy (\c -> isAscii c && isLetter c) + pLanguage = do + cs <- P.many1 asciiLetter + let lcs = length cs + guard $ lcs == 2 || lcs == 3 + return cs + pScript = P.try $ do + P.char '-' + x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c) + xs <- P.count 3 + (P.satisfy (\c -> isAscii c && isLetter c && isLower c)) + return (x:xs) + pRegion = P.try $ do + P.char '-' + cs <- P.many1 asciiLetter + let lcs = length cs + guard $ lcs == 2 || lcs == 3 + return cs + pVariant = P.try $ do + P.char '-' + ds <- P.option "" (P.count 1 P.digit) + cs <- P.many1 asciiLetter + let var = ds ++ cs + guard $ if null ds + then length var >= 5 && length var <= 8 + else length var == 4 + return var diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 52ababb14..bc8568cd1 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -67,7 +67,8 @@ import Text.Pandoc.Shared hiding (Element) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Walk import Text.Pandoc.Writers.Math -import Text.Pandoc.Writers.Shared (fixDisplayMath, getLang, renderLang) +import Text.Pandoc.Writers.Shared (fixDisplayMath) +import Text.Pandoc.BCP47 (getLang, renderLang) import Text.Printf (printf) import Text.TeXMath import Text.XML.Light as XML diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 54873efb2..98aa3b30b 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -50,8 +50,8 @@ import Text.Pandoc.Shared (stringify) import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy) import Text.Pandoc.Walk import Text.Pandoc.Writers.OpenDocument (writeOpenDocument) -import Text.Pandoc.Writers.Shared (fixDisplayMath, getLang, Lang(..), - renderLang) +import Text.Pandoc.Writers.Shared (fixDisplayMath) +import Text.Pandoc.BCP47 (getLang, Lang(..), renderLang) import Text.Pandoc.XML import Text.TeXMath import Text.XML.Light diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 763cea5ad..6c53ab4ab 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -50,6 +50,7 @@ import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared import Text.Pandoc.XML +import Text.Pandoc.BCP47 (parseBCP47, Lang(..)) import Text.Printf (printf) -- | Auxiliary function to convert Plain block to Para. diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index b56f2d468..2047285eb 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -29,11 +29,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Shared utility functions for pandoc writers. -} module Text.Pandoc.Writers.Shared ( - getLang - , parseBCP47 - , Lang(..) - , renderLang - , metaToJSON + metaToJSON , metaToJSON' , addVariablesToJSON , getField @@ -46,97 +42,20 @@ module Text.Pandoc.Writers.Shared ( , gridTable ) where -import Control.Monad (liftM, zipWithM, guard) +import Control.Monad (liftM, zipWithM) import Data.Aeson (FromJSON (..), Result (..), ToJSON (..), Value (Object), encode, fromJSON) -import Data.Char (isAscii, isLetter, isUpper, isLower) import qualified Data.HashMap.Strict as H -import Data.List (groupBy, intersperse, transpose, intercalate) +import Data.List (groupBy, intersperse, transpose) import qualified Data.Map as M import Data.Maybe (isJust) import qualified Data.Text as T import qualified Data.Traversable as Traversable import Text.Pandoc.Definition -import Text.Pandoc.Class (PandocMonad, report) -import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Pretty import Text.Pandoc.UTF8 (toStringLazy) import Text.Pandoc.XML (escapeStringForXML) -import qualified Text.Parsec as P - --- | Represents BCP 47 language/country code. -data Lang = Lang{ langLanguage :: String - , langScript :: String - , langRegion :: String - , langVariants :: [String] } - deriving (Eq, Ord, Show) - --- | Render a Lang as BCP 47. -renderLang :: Lang -> String -renderLang lang = intercalate "-" (langLanguage lang : filter (not . null) - ([langScript lang, langRegion lang] ++ langVariants lang)) - --- | Get the contents of the `lang` metadata field or variable. -getLang :: PandocMonad m => WriterOptions -> Meta -> m (Maybe Lang) -getLang opts meta = case - (case lookup "lang" (writerVariables opts) of - Just s -> Just s - _ -> - case lookupMeta "lang" meta of - Just (MetaInlines [Str s]) -> Just s - Just (MetaString s) -> Just s - _ -> Nothing) of - Nothing -> return Nothing - Just s -> case parseBCP47 s of - Left _ -> do - report $ InvalidLang s - return Nothing - Right l -> return (Just l) - --- | Parse a BCP 47 string as a Lang. -parseBCP47 :: String -> Either String Lang -parseBCP47 lang = - case P.parse bcp47 "lang" lang of - Right r -> Right r - Left e -> Left $ show e - where bcp47 = do - language <- pLanguage - script <- P.option "" pScript - region <- P.option "" pRegion - variants <- P.many pVariant - () <$ P.char '-' P.<|> P.eof - return $ Lang{ langLanguage = language - , langScript = script - , langRegion = region - , langVariants = variants } - asciiLetter = P.satisfy (\c -> isAscii c && isLetter c) - pLanguage = do - cs <- P.many1 asciiLetter - let lcs = length cs - guard $ lcs == 2 || lcs == 3 - return cs - pScript = P.try $ do - P.char '-' - x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c) - xs <- P.count 3 - (P.satisfy (\c -> isAscii c && isLetter c && isLower c)) - return (x:xs) - pRegion = P.try $ do - P.char '-' - cs <- P.many1 asciiLetter - let lcs = length cs - guard $ lcs == 2 || lcs == 3 - return cs - pVariant = P.try $ do - P.char '-' - ds <- P.option "" (P.count 1 P.digit) - cs <- P.many1 asciiLetter - let var = ds ++ cs - guard $ if null ds - then length var >= 5 && length var <= 8 - else length var == 4 - return var -- | Create JSON value for template from a 'Meta' and an association list -- of variables, specified at the command line or in the writer. -- cgit v1.2.3