diff options
author | John MacFarlane <jgm@berkeley.edu> | 2017-08-11 11:56:54 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2017-08-11 22:22:31 -0700 |
commit | 74212eb1b0e1757fc0ac3e5d45b0ee18bac491e5 (patch) | |
tree | 522b79da10f8838b0bd2665ddfa741eed0ecbeff /src/Text/Pandoc/Class.hs | |
parent | 7892dcd353ff0efd24ed753061b9be8e556b0177 (diff) | |
download | pandoc-74212eb1b0e1757fc0ac3e5d45b0ee18bac491e5.tar.gz |
Added support for translations (localization) (see #3559).
* readDataFile, readDefaultDataFile, getReferenceDocx,
getReferenceODT have been removed from Shared and
moved into Class. They are now defined in terms of
PandocMonad primitives, rather than being primitve
methods of the class.
* toLang has been moved from BCP47 to Class.
* NoTranslation and CouldNotLoudTranslations have
been added to LogMessage.
* New module, Text.Pandoc.Translations, exporting
Term, Translations, readTranslations.
* New functions in Class: translateTerm, setTranslations.
Note that nothing is loaded from data files until
translateTerm is used; setTranslation just sets the
language to be used.
* Added two translation data files in data/translations.
* LaTeX reader: Support `\setmainlanguage` or `\setdefaultlanguage`
(polyglossia) and `\figurename`.
Diffstat (limited to 'src/Text/Pandoc/Class.hs')
-rw-r--r-- | src/Text/Pandoc/Class.hs | 258 |
1 files changed, 212 insertions, 46 deletions
diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs index 4697177ed..a3dd9ad58 100644 --- a/src/Text/Pandoc/Class.hs +++ b/src/Text/Pandoc/Class.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE CPP #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE FlexibleInstances #-} @@ -71,8 +72,15 @@ module Text.Pandoc.Class ( PandocMonad(..) , runIO , runIOorExplode , runPure + , readDefaultDataFile + , readDataFile , fillMediaBag , extractMedia + , toLang + , setTranslations + , translateTerm + , Translations(..) + , Term(..) ) where import Prelude hiding (readFile) @@ -81,10 +89,9 @@ import qualified System.Random as IO (newStdGen) import Codec.Archive.Zip (Archive, fromArchive, emptyArchive) import Data.Unique (hashUnique) import qualified Data.Unique as IO (newUnique) -import qualified Text.Pandoc.Shared as IO ( readDataFile - , readDefaultDataFile - , openURL ) +import qualified Text.Pandoc.Shared as Shared import qualified Text.Pandoc.UTF8 as UTF8 +import qualified System.Directory as Directory import Text.Pandoc.Compat.Time (UTCTime) import Text.Pandoc.Logging import Text.Parsec (ParsecT, getPosition, sourceLine, sourceName) @@ -98,6 +105,7 @@ import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds , posixSecondsToUTCTime , POSIXTime ) import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime, utc) +import qualified System.FilePath.Posix as Posix import Network.URI ( escapeURIString, nonStrictRelativeTo, unEscapeString, parseURIReference, isAllowedInURI, parseURI, URI(..) ) @@ -111,7 +119,7 @@ import qualified System.Environment as IO (lookupEnv) import System.FilePath.Glob (match, compile) import System.Directory (createDirectoryIfMissing, getDirectoryContents, doesDirectoryExist) -import System.FilePath ((</>), (<.>), takeDirectory, +import System.FilePath ((</>), (<.>), takeDirectory, splitDirectories, takeExtension, dropExtension, isRelative, normalise) import qualified System.FilePath.Glob as IO (glob) import qualified System.Directory as IO (getModificationTime) @@ -121,13 +129,21 @@ import Control.Monad.State.Strict import Control.Monad.Except import Control.Monad.Writer (WriterT) import Control.Monad.RWS (RWST) +import Codec.Archive.Zip import Data.Word (Word8) import Data.Default import System.IO.Error import System.IO (stderr) import qualified Data.Map as M import Text.Pandoc.Error +import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang) +import Text.Pandoc.Translations (Term(..), Translations(..), readTranslations) import qualified Debug.Trace +#ifdef EMBED_DATA_FILES +import Text.Pandoc.Data (dataFiles) +#else +import Paths_pandoc (getDataFileName) +#endif -- | The PandocMonad typeclass contains all the potentially -- IO-related functions used in pandoc's readers and writers. @@ -155,15 +171,12 @@ class (Functor m, Applicative m, Monad m, MonadError PandocError m) -- | Read the strict ByteString contents from a file path, -- raising an error on failure. readFileStrict :: FilePath -> m B.ByteString - -- | Read file from from Cabal data directory. - readDefaultDataFile :: FilePath -> m B.ByteString - -- | Read file from user data directory or, - -- if not found there, from Cabal data directory. - readDataFile :: FilePath -> m B.ByteString -- | Return a list of paths that match a glob, relative to -- the working directory. See 'System.FilePath.Glob' for -- the glob syntax. glob :: String -> m [FilePath] + -- | Returns True if file exists. + fileExists :: FilePath -> m Bool -- | Return the modification time of a file. getModificationTime :: FilePath -> m UTCTime -- | Get the value of the 'CommonState' used by all instances @@ -272,6 +285,9 @@ data CommonState = CommonState { stLog :: [LogMessage] -- ^ Directory to search for data files , stMediaBag :: MediaBag -- ^ Media parsed from binary containers + , stTranslations :: Maybe + (Lang, Maybe Translations) + -- ^ Translations for localization , stInputFiles :: Maybe [FilePath] -- ^ List of input files from command line , stOutputFile :: Maybe FilePath @@ -290,6 +306,7 @@ instance Default CommonState where def = CommonState { stLog = [] , stUserDataDir = Nothing , stMediaBag = mempty + , stTranslations = Nothing , stInputFiles = Nothing , stOutputFile = Nothing , stResourcePath = ["."] @@ -297,6 +314,71 @@ instance Default CommonState where , stTrace = False } +-- | Convert BCP47 string to a Lang, issuing warning +-- if there are problems. +toLang :: PandocMonad m => Maybe String -> m (Maybe Lang) +toLang Nothing = return Nothing +toLang (Just s) = + case parseBCP47 s of + Left _ -> do + report $ InvalidLang s + return Nothing + Right l -> return (Just l) + +-- | Select the language to use with 'translateTerm'. +-- Note that this does not read a translation file; +-- that is only done the first time 'translateTerm' is +-- used. +setTranslations :: PandocMonad m => Lang -> m () +setTranslations lang = + modifyCommonState $ \st -> st{ stTranslations = Just (lang, Nothing) } + +-- | Load term map. +getTranslations :: PandocMonad m => m Translations +getTranslations = do + mbtrans <- getsCommonState stTranslations + case mbtrans of + Nothing -> return mempty -- no language defined + Just (_, Just t) -> return t + Just (lang, Nothing) -> do -- read from file + let translationFile = "translations/" ++ renderLang lang ++ ".trans" + let fallbackFile = "translations/" ++ langLanguage lang ++ ".trans" + let getTrans bs = + case readTranslations (UTF8.toString bs) of + Left e -> do + report $ CouldNotLoadTranslations (renderLang lang) e + -- make sure we don't try again... + modifyCommonState $ \st -> + st{ stTranslations = Nothing } + return mempty + Right t -> do + modifyCommonState $ \st -> + st{ stTranslations = Just (lang, Just t) } + return t + catchError (readDataFile translationFile >>= getTrans) + (\_ -> + catchError (readDataFile fallbackFile >>= getTrans) + (\e -> do + report $ CouldNotLoadTranslations (renderLang lang) + $ case e of + PandocCouldNotFindDataFileError _ -> + ("data file " ++ fallbackFile ++ " not found") + _ -> "" + -- make sure we don't try again... + modifyCommonState $ \st -> st{ stTranslations = Nothing } + return mempty)) + +-- | Get a translation from the current term map. +-- Issue a warning if the term is not defined. +translateTerm :: PandocMonad m => Term -> m String +translateTerm term = do + Translations termMap <- getTranslations + case M.lookup term termMap of + Just s -> return s + Nothing -> do + report $ NoTranslation (show term) + return "" + -- | Evaluate a 'PandocIO' operation. runIO :: PandocIO a -> IO (Either PandocError a) runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma @@ -317,7 +399,7 @@ liftIOError :: (String -> IO a) -> String -> PandocIO a liftIOError f u = do res <- liftIO $ tryIOError $ f u case res of - Left e -> throwError $ PandocIOError u e + Left e -> throwError $ PandocIOError u e Right r -> return r instance PandocMonad PandocIO where @@ -328,17 +410,15 @@ instance PandocMonad PandocIO where newUniqueHash = hashUnique <$> liftIO IO.newUnique openURL u = do report $ Fetching u - res <- liftIO (IO.openURL u) + res <- liftIOError Shared.openURL u case res of Right r -> return r Left e -> throwError $ PandocHttpError u e readFileLazy s = liftIOError BL.readFile s readFileStrict s = liftIOError B.readFile s - readDefaultDataFile fname = liftIOError IO.readDefaultDataFile fname - readDataFile fname = do - datadir <- getUserDataDir - liftIOError (IO.readDataFile datadir) fname - glob = liftIO . IO.glob + + glob = liftIOError IO.glob + fileExists = liftIOError Directory.doesFileExist getModificationTime fp = liftIOError IO.getModificationTime fp getCommonState = PandocIO $ lift get putCommonState x = PandocIO $ lift $ put x @@ -432,6 +512,109 @@ downloadOrRead sourceURL s = convertSlash '\\' = '/' convertSlash x = x +getDefaultReferenceDocx :: PandocMonad m => m Archive +getDefaultReferenceDocx = do + let paths = ["[Content_Types].xml", + "_rels/.rels", + "docProps/app.xml", + "docProps/core.xml", + "word/document.xml", + "word/fontTable.xml", + "word/footnotes.xml", + "word/numbering.xml", + "word/settings.xml", + "word/webSettings.xml", + "word/styles.xml", + "word/_rels/document.xml.rels", + "word/_rels/footnotes.xml.rels", + "word/theme/theme1.xml"] + let toLazy = BL.fromChunks . (:[]) + let pathToEntry path = do + epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime + contents <- toLazy <$> readDataFile ("docx/" ++ path) + return $ toEntry path epochtime contents + datadir <- getUserDataDir + mbArchive <- case datadir of + Nothing -> return Nothing + Just d -> do + exists <- fileExists (d </> "reference.docx") + if exists + then return (Just (d </> "reference.docx")) + else return Nothing + case mbArchive of + Just arch -> toArchive <$> readFileLazy arch + Nothing -> foldr addEntryToArchive emptyArchive <$> + mapM pathToEntry paths + +getDefaultReferenceODT :: PandocMonad m => m Archive +getDefaultReferenceODT = do + let paths = ["mimetype", + "manifest.rdf", + "styles.xml", + "content.xml", + "meta.xml", + "settings.xml", + "Configurations2/accelerator/current.xml", + "Thumbnails/thumbnail.png", + "META-INF/manifest.xml"] + let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime + contents <- (BL.fromChunks . (:[])) `fmap` + readDataFile ("odt/" ++ path) + return $ toEntry path epochtime contents + datadir <- getUserDataDir + mbArchive <- case datadir of + Nothing -> return Nothing + Just d -> do + exists <- fileExists (d </> "reference.odt") + if exists + then return (Just (d </> "reference.odt")) + else return Nothing + case mbArchive of + Just arch -> toArchive <$> readFileLazy arch + Nothing -> foldr addEntryToArchive emptyArchive <$> + mapM pathToEntry paths + +-- | Read file from user data directory or, +-- if not found there, from Cabal data directory. +readDataFile :: PandocMonad m => FilePath -> m B.ByteString +readDataFile fname = do + datadir <- getUserDataDir + case datadir of + Nothing -> readDefaultDataFile fname + Just userDir -> do + exists <- fileExists (userDir </> fname) + if exists + then readFileStrict (userDir </> fname) + else readDefaultDataFile fname + +-- | Read file from from Cabal data directory. +readDefaultDataFile :: PandocMonad m => FilePath -> m B.ByteString +readDefaultDataFile "reference.docx" = + (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceDocx +readDefaultDataFile "reference.odt" = + (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceODT +readDefaultDataFile fname = +#ifdef EMBED_DATA_FILES + case lookup (makeCanonical fname) dataFiles of + Nothing -> throwError $ PandocCouldNotFindDataFileError fname + Just contents -> return contents + where makeCanonical = Posix.joinPath . transformPathParts . splitDirectories + transformPathParts = reverse . foldl go [] + go as "." = as + go (_:as) ".." = as + go as x = x : as +#else + getDataFileName fname' >>= checkExistence >>= readFileStrict + where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname + +checkExistence :: PandocMonad m => FilePath -> m FilePath +checkExistence fn = do + exists <- fileExists fn + if exists + then return fn + else throwError $ PandocCouldNotFindDataFileError fn +#endif + withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a withPaths [] _ fp = throwError $ PandocResourceNotFound fp withPaths (p:ps) action fp = @@ -491,9 +674,8 @@ writeMedia dir mediabag subpath = do Nothing -> throwError $ PandocResourceNotFound subpath Just (_, bs) -> do report $ Extracting fullpath - liftIO $ do - createDirectoryIfMissing True $ takeDirectory fullpath - BL.writeFile fullpath bs + liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) + liftIOError (\p -> BL.writeFile p bs) fullpath adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline adjustImagePath dir paths (Image attr lab (src, tit)) @@ -624,28 +806,17 @@ instance PandocMonad PandocPure where case infoFileContents <$> getFileInfo fp fps of Just bs -> return bs Nothing -> throwError $ PandocResourceNotFound fp - readDefaultDataFile "reference.docx" = - (B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceDocx - readDefaultDataFile "reference.odt" = - (B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceODT - readDefaultDataFile fname = do - let fname' = if fname == "MANUAL.txt" then fname else "data" </> fname - readFileStrict fname' - readDataFile fname = do - datadir <- getUserDataDir - case datadir of - Just userDir -> do - userDirFiles <- getsPureState stUserDataFiles - case infoFileContents <$> getFileInfo (userDir </> fname) - userDirFiles of - Just bs -> return bs - Nothing -> readDefaultDataFile fname - Nothing -> readDefaultDataFile fname glob s = do FileTree ftmap <- getsPureState stFiles return $ filter (match (compile s)) $ M.keys ftmap + fileExists fp = do + fps <- getsPureState stFiles + case getFileInfo fp fps of + Nothing -> return False + Just _ -> return True + getModificationTime fp = do fps <- getsPureState stFiles case infoFileMTime <$> getFileInfo fp fps of @@ -667,9 +838,8 @@ instance PandocMonad m => PandocMonad (ParsecT s st m) where openURL = lift . openURL readFileLazy = lift . readFileLazy readFileStrict = lift . readFileStrict - readDefaultDataFile = lift . readDefaultDataFile - readDataFile = lift . readDataFile glob = lift . glob + fileExists = lift . fileExists getModificationTime = lift . getModificationTime getCommonState = lift getCommonState putCommonState = lift . putCommonState @@ -696,9 +866,8 @@ instance PandocMonad m => PandocMonad (ReaderT r m) where openURL = lift . openURL readFileLazy = lift . readFileLazy readFileStrict = lift . readFileStrict - readDefaultDataFile = lift . readDefaultDataFile - readDataFile = lift . readDataFile glob = lift . glob + fileExists = lift . fileExists getModificationTime = lift . getModificationTime getCommonState = lift getCommonState putCommonState = lift . putCommonState @@ -713,9 +882,8 @@ instance (PandocMonad m, Monoid w) => PandocMonad (WriterT w m) where openURL = lift . openURL readFileLazy = lift . readFileLazy readFileStrict = lift . readFileStrict - readDefaultDataFile = lift . readDefaultDataFile - readDataFile = lift . readDataFile glob = lift . glob + fileExists = lift . fileExists getModificationTime = lift . getModificationTime getCommonState = lift getCommonState putCommonState = lift . putCommonState @@ -730,9 +898,8 @@ instance (PandocMonad m, Monoid w) => PandocMonad (RWST r w st m) where openURL = lift . openURL readFileLazy = lift . readFileLazy readFileStrict = lift . readFileStrict - readDefaultDataFile = lift . readDefaultDataFile - readDataFile = lift . readDataFile glob = lift . glob + fileExists = lift . fileExists getModificationTime = lift . getModificationTime getCommonState = lift getCommonState putCommonState = lift . putCommonState @@ -747,9 +914,8 @@ instance PandocMonad m => PandocMonad (StateT st m) where openURL = lift . openURL readFileLazy = lift . readFileLazy readFileStrict = lift . readFileStrict - readDefaultDataFile = lift . readDefaultDataFile - readDataFile = lift . readDataFile glob = lift . glob + fileExists = lift . fileExists getModificationTime = lift . getModificationTime getCommonState = lift getCommonState putCommonState = lift . putCommonState |