aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Class.hs
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2017-08-11 11:56:54 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2017-08-11 22:22:31 -0700
commit74212eb1b0e1757fc0ac3e5d45b0ee18bac491e5 (patch)
tree522b79da10f8838b0bd2665ddfa741eed0ecbeff /src/Text/Pandoc/Class.hs
parent7892dcd353ff0efd24ed753061b9be8e556b0177 (diff)
downloadpandoc-74212eb1b0e1757fc0ac3e5d45b0ee18bac491e5.tar.gz
Added support for translations (localization) (see #3559).
* readDataFile, readDefaultDataFile, getReferenceDocx, getReferenceODT have been removed from Shared and moved into Class. They are now defined in terms of PandocMonad primitives, rather than being primitve methods of the class. * toLang has been moved from BCP47 to Class. * NoTranslation and CouldNotLoudTranslations have been added to LogMessage. * New module, Text.Pandoc.Translations, exporting Term, Translations, readTranslations. * New functions in Class: translateTerm, setTranslations. Note that nothing is loaded from data files until translateTerm is used; setTranslation just sets the language to be used. * Added two translation data files in data/translations. * LaTeX reader: Support `\setmainlanguage` or `\setdefaultlanguage` (polyglossia) and `\figurename`.
Diffstat (limited to 'src/Text/Pandoc/Class.hs')
-rw-r--r--src/Text/Pandoc/Class.hs258
1 files changed, 212 insertions, 46 deletions
diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs
index 4697177ed..a3dd9ad58 100644
--- a/src/Text/Pandoc/Class.hs
+++ b/src/Text/Pandoc/Class.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
@@ -71,8 +72,15 @@ module Text.Pandoc.Class ( PandocMonad(..)
, runIO
, runIOorExplode
, runPure
+ , readDefaultDataFile
+ , readDataFile
, fillMediaBag
, extractMedia
+ , toLang
+ , setTranslations
+ , translateTerm
+ , Translations(..)
+ , Term(..)
) where
import Prelude hiding (readFile)
@@ -81,10 +89,9 @@ import qualified System.Random as IO (newStdGen)
import Codec.Archive.Zip (Archive, fromArchive, emptyArchive)
import Data.Unique (hashUnique)
import qualified Data.Unique as IO (newUnique)
-import qualified Text.Pandoc.Shared as IO ( readDataFile
- , readDefaultDataFile
- , openURL )
+import qualified Text.Pandoc.Shared as Shared
import qualified Text.Pandoc.UTF8 as UTF8
+import qualified System.Directory as Directory
import Text.Pandoc.Compat.Time (UTCTime)
import Text.Pandoc.Logging
import Text.Parsec (ParsecT, getPosition, sourceLine, sourceName)
@@ -98,6 +105,7 @@ import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds
, posixSecondsToUTCTime
, POSIXTime )
import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime, utc)
+import qualified System.FilePath.Posix as Posix
import Network.URI ( escapeURIString, nonStrictRelativeTo,
unEscapeString, parseURIReference, isAllowedInURI,
parseURI, URI(..) )
@@ -111,7 +119,7 @@ import qualified System.Environment as IO (lookupEnv)
import System.FilePath.Glob (match, compile)
import System.Directory (createDirectoryIfMissing, getDirectoryContents,
doesDirectoryExist)
-import System.FilePath ((</>), (<.>), takeDirectory,
+import System.FilePath ((</>), (<.>), takeDirectory, splitDirectories,
takeExtension, dropExtension, isRelative, normalise)
import qualified System.FilePath.Glob as IO (glob)
import qualified System.Directory as IO (getModificationTime)
@@ -121,13 +129,21 @@ import Control.Monad.State.Strict
import Control.Monad.Except
import Control.Monad.Writer (WriterT)
import Control.Monad.RWS (RWST)
+import Codec.Archive.Zip
import Data.Word (Word8)
import Data.Default
import System.IO.Error
import System.IO (stderr)
import qualified Data.Map as M
import Text.Pandoc.Error
+import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang)
+import Text.Pandoc.Translations (Term(..), Translations(..), readTranslations)
import qualified Debug.Trace
+#ifdef EMBED_DATA_FILES
+import Text.Pandoc.Data (dataFiles)
+#else
+import Paths_pandoc (getDataFileName)
+#endif
-- | The PandocMonad typeclass contains all the potentially
-- IO-related functions used in pandoc's readers and writers.
@@ -155,15 +171,12 @@ class (Functor m, Applicative m, Monad m, MonadError PandocError m)
-- | Read the strict ByteString contents from a file path,
-- raising an error on failure.
readFileStrict :: FilePath -> m B.ByteString
- -- | Read file from from Cabal data directory.
- readDefaultDataFile :: FilePath -> m B.ByteString
- -- | Read file from user data directory or,
- -- if not found there, from Cabal data directory.
- readDataFile :: FilePath -> m B.ByteString
-- | Return a list of paths that match a glob, relative to
-- the working directory. See 'System.FilePath.Glob' for
-- the glob syntax.
glob :: String -> m [FilePath]
+ -- | Returns True if file exists.
+ fileExists :: FilePath -> m Bool
-- | Return the modification time of a file.
getModificationTime :: FilePath -> m UTCTime
-- | Get the value of the 'CommonState' used by all instances
@@ -272,6 +285,9 @@ data CommonState = CommonState { stLog :: [LogMessage]
-- ^ Directory to search for data files
, stMediaBag :: MediaBag
-- ^ Media parsed from binary containers
+ , stTranslations :: Maybe
+ (Lang, Maybe Translations)
+ -- ^ Translations for localization
, stInputFiles :: Maybe [FilePath]
-- ^ List of input files from command line
, stOutputFile :: Maybe FilePath
@@ -290,6 +306,7 @@ instance Default CommonState where
def = CommonState { stLog = []
, stUserDataDir = Nothing
, stMediaBag = mempty
+ , stTranslations = Nothing
, stInputFiles = Nothing
, stOutputFile = Nothing
, stResourcePath = ["."]
@@ -297,6 +314,71 @@ instance Default CommonState where
, stTrace = False
}
+-- | Convert BCP47 string to a Lang, issuing warning
+-- if there are problems.
+toLang :: PandocMonad m => Maybe String -> m (Maybe Lang)
+toLang Nothing = return Nothing
+toLang (Just s) =
+ case parseBCP47 s of
+ Left _ -> do
+ report $ InvalidLang s
+ return Nothing
+ Right l -> return (Just l)
+
+-- | Select the language to use with 'translateTerm'.
+-- Note that this does not read a translation file;
+-- that is only done the first time 'translateTerm' is
+-- used.
+setTranslations :: PandocMonad m => Lang -> m ()
+setTranslations lang =
+ modifyCommonState $ \st -> st{ stTranslations = Just (lang, Nothing) }
+
+-- | Load term map.
+getTranslations :: PandocMonad m => m Translations
+getTranslations = do
+ mbtrans <- getsCommonState stTranslations
+ case mbtrans of
+ Nothing -> return mempty -- no language defined
+ Just (_, Just t) -> return t
+ Just (lang, Nothing) -> do -- read from file
+ let translationFile = "translations/" ++ renderLang lang ++ ".trans"
+ let fallbackFile = "translations/" ++ langLanguage lang ++ ".trans"
+ let getTrans bs =
+ case readTranslations (UTF8.toString bs) of
+ Left e -> do
+ report $ CouldNotLoadTranslations (renderLang lang) e
+ -- make sure we don't try again...
+ modifyCommonState $ \st ->
+ st{ stTranslations = Nothing }
+ return mempty
+ Right t -> do
+ modifyCommonState $ \st ->
+ st{ stTranslations = Just (lang, Just t) }
+ return t
+ catchError (readDataFile translationFile >>= getTrans)
+ (\_ ->
+ catchError (readDataFile fallbackFile >>= getTrans)
+ (\e -> do
+ report $ CouldNotLoadTranslations (renderLang lang)
+ $ case e of
+ PandocCouldNotFindDataFileError _ ->
+ ("data file " ++ fallbackFile ++ " not found")
+ _ -> ""
+ -- make sure we don't try again...
+ modifyCommonState $ \st -> st{ stTranslations = Nothing }
+ return mempty))
+
+-- | Get a translation from the current term map.
+-- Issue a warning if the term is not defined.
+translateTerm :: PandocMonad m => Term -> m String
+translateTerm term = do
+ Translations termMap <- getTranslations
+ case M.lookup term termMap of
+ Just s -> return s
+ Nothing -> do
+ report $ NoTranslation (show term)
+ return ""
+
-- | Evaluate a 'PandocIO' operation.
runIO :: PandocIO a -> IO (Either PandocError a)
runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma
@@ -317,7 +399,7 @@ liftIOError :: (String -> IO a) -> String -> PandocIO a
liftIOError f u = do
res <- liftIO $ tryIOError $ f u
case res of
- Left e -> throwError $ PandocIOError u e
+ Left e -> throwError $ PandocIOError u e
Right r -> return r
instance PandocMonad PandocIO where
@@ -328,17 +410,15 @@ instance PandocMonad PandocIO where
newUniqueHash = hashUnique <$> liftIO IO.newUnique
openURL u = do
report $ Fetching u
- res <- liftIO (IO.openURL u)
+ res <- liftIOError Shared.openURL u
case res of
Right r -> return r
Left e -> throwError $ PandocHttpError u e
readFileLazy s = liftIOError BL.readFile s
readFileStrict s = liftIOError B.readFile s
- readDefaultDataFile fname = liftIOError IO.readDefaultDataFile fname
- readDataFile fname = do
- datadir <- getUserDataDir
- liftIOError (IO.readDataFile datadir) fname
- glob = liftIO . IO.glob
+
+ glob = liftIOError IO.glob
+ fileExists = liftIOError Directory.doesFileExist
getModificationTime fp = liftIOError IO.getModificationTime fp
getCommonState = PandocIO $ lift get
putCommonState x = PandocIO $ lift $ put x
@@ -432,6 +512,109 @@ downloadOrRead sourceURL s =
convertSlash '\\' = '/'
convertSlash x = x
+getDefaultReferenceDocx :: PandocMonad m => m Archive
+getDefaultReferenceDocx = do
+ let paths = ["[Content_Types].xml",
+ "_rels/.rels",
+ "docProps/app.xml",
+ "docProps/core.xml",
+ "word/document.xml",
+ "word/fontTable.xml",
+ "word/footnotes.xml",
+ "word/numbering.xml",
+ "word/settings.xml",
+ "word/webSettings.xml",
+ "word/styles.xml",
+ "word/_rels/document.xml.rels",
+ "word/_rels/footnotes.xml.rels",
+ "word/theme/theme1.xml"]
+ let toLazy = BL.fromChunks . (:[])
+ let pathToEntry path = do
+ epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime
+ contents <- toLazy <$> readDataFile ("docx/" ++ path)
+ return $ toEntry path epochtime contents
+ datadir <- getUserDataDir
+ mbArchive <- case datadir of
+ Nothing -> return Nothing
+ Just d -> do
+ exists <- fileExists (d </> "reference.docx")
+ if exists
+ then return (Just (d </> "reference.docx"))
+ else return Nothing
+ case mbArchive of
+ Just arch -> toArchive <$> readFileLazy arch
+ Nothing -> foldr addEntryToArchive emptyArchive <$>
+ mapM pathToEntry paths
+
+getDefaultReferenceODT :: PandocMonad m => m Archive
+getDefaultReferenceODT = do
+ let paths = ["mimetype",
+ "manifest.rdf",
+ "styles.xml",
+ "content.xml",
+ "meta.xml",
+ "settings.xml",
+ "Configurations2/accelerator/current.xml",
+ "Thumbnails/thumbnail.png",
+ "META-INF/manifest.xml"]
+ let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime
+ contents <- (BL.fromChunks . (:[])) `fmap`
+ readDataFile ("odt/" ++ path)
+ return $ toEntry path epochtime contents
+ datadir <- getUserDataDir
+ mbArchive <- case datadir of
+ Nothing -> return Nothing
+ Just d -> do
+ exists <- fileExists (d </> "reference.odt")
+ if exists
+ then return (Just (d </> "reference.odt"))
+ else return Nothing
+ case mbArchive of
+ Just arch -> toArchive <$> readFileLazy arch
+ Nothing -> foldr addEntryToArchive emptyArchive <$>
+ mapM pathToEntry paths
+
+-- | Read file from user data directory or,
+-- if not found there, from Cabal data directory.
+readDataFile :: PandocMonad m => FilePath -> m B.ByteString
+readDataFile fname = do
+ datadir <- getUserDataDir
+ case datadir of
+ Nothing -> readDefaultDataFile fname
+ Just userDir -> do
+ exists <- fileExists (userDir </> fname)
+ if exists
+ then readFileStrict (userDir </> fname)
+ else readDefaultDataFile fname
+
+-- | Read file from from Cabal data directory.
+readDefaultDataFile :: PandocMonad m => FilePath -> m B.ByteString
+readDefaultDataFile "reference.docx" =
+ (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceDocx
+readDefaultDataFile "reference.odt" =
+ (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceODT
+readDefaultDataFile fname =
+#ifdef EMBED_DATA_FILES
+ case lookup (makeCanonical fname) dataFiles of
+ Nothing -> throwError $ PandocCouldNotFindDataFileError fname
+ Just contents -> return contents
+ where makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
+ transformPathParts = reverse . foldl go []
+ go as "." = as
+ go (_:as) ".." = as
+ go as x = x : as
+#else
+ getDataFileName fname' >>= checkExistence >>= readFileStrict
+ where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
+
+checkExistence :: PandocMonad m => FilePath -> m FilePath
+checkExistence fn = do
+ exists <- fileExists fn
+ if exists
+ then return fn
+ else throwError $ PandocCouldNotFindDataFileError fn
+#endif
+
withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a
withPaths [] _ fp = throwError $ PandocResourceNotFound fp
withPaths (p:ps) action fp =
@@ -491,9 +674,8 @@ writeMedia dir mediabag subpath = do
Nothing -> throwError $ PandocResourceNotFound subpath
Just (_, bs) -> do
report $ Extracting fullpath
- liftIO $ do
- createDirectoryIfMissing True $ takeDirectory fullpath
- BL.writeFile fullpath bs
+ liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
+ liftIOError (\p -> BL.writeFile p bs) fullpath
adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
adjustImagePath dir paths (Image attr lab (src, tit))
@@ -624,28 +806,17 @@ instance PandocMonad PandocPure where
case infoFileContents <$> getFileInfo fp fps of
Just bs -> return bs
Nothing -> throwError $ PandocResourceNotFound fp
- readDefaultDataFile "reference.docx" =
- (B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceDocx
- readDefaultDataFile "reference.odt" =
- (B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceODT
- readDefaultDataFile fname = do
- let fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
- readFileStrict fname'
- readDataFile fname = do
- datadir <- getUserDataDir
- case datadir of
- Just userDir -> do
- userDirFiles <- getsPureState stUserDataFiles
- case infoFileContents <$> getFileInfo (userDir </> fname)
- userDirFiles of
- Just bs -> return bs
- Nothing -> readDefaultDataFile fname
- Nothing -> readDefaultDataFile fname
glob s = do
FileTree ftmap <- getsPureState stFiles
return $ filter (match (compile s)) $ M.keys ftmap
+ fileExists fp = do
+ fps <- getsPureState stFiles
+ case getFileInfo fp fps of
+ Nothing -> return False
+ Just _ -> return True
+
getModificationTime fp = do
fps <- getsPureState stFiles
case infoFileMTime <$> getFileInfo fp fps of
@@ -667,9 +838,8 @@ instance PandocMonad m => PandocMonad (ParsecT s st m) where
openURL = lift . openURL
readFileLazy = lift . readFileLazy
readFileStrict = lift . readFileStrict
- readDefaultDataFile = lift . readDefaultDataFile
- readDataFile = lift . readDataFile
glob = lift . glob
+ fileExists = lift . fileExists
getModificationTime = lift . getModificationTime
getCommonState = lift getCommonState
putCommonState = lift . putCommonState
@@ -696,9 +866,8 @@ instance PandocMonad m => PandocMonad (ReaderT r m) where
openURL = lift . openURL
readFileLazy = lift . readFileLazy
readFileStrict = lift . readFileStrict
- readDefaultDataFile = lift . readDefaultDataFile
- readDataFile = lift . readDataFile
glob = lift . glob
+ fileExists = lift . fileExists
getModificationTime = lift . getModificationTime
getCommonState = lift getCommonState
putCommonState = lift . putCommonState
@@ -713,9 +882,8 @@ instance (PandocMonad m, Monoid w) => PandocMonad (WriterT w m) where
openURL = lift . openURL
readFileLazy = lift . readFileLazy
readFileStrict = lift . readFileStrict
- readDefaultDataFile = lift . readDefaultDataFile
- readDataFile = lift . readDataFile
glob = lift . glob
+ fileExists = lift . fileExists
getModificationTime = lift . getModificationTime
getCommonState = lift getCommonState
putCommonState = lift . putCommonState
@@ -730,9 +898,8 @@ instance (PandocMonad m, Monoid w) => PandocMonad (RWST r w st m) where
openURL = lift . openURL
readFileLazy = lift . readFileLazy
readFileStrict = lift . readFileStrict
- readDefaultDataFile = lift . readDefaultDataFile
- readDataFile = lift . readDataFile
glob = lift . glob
+ fileExists = lift . fileExists
getModificationTime = lift . getModificationTime
getCommonState = lift getCommonState
putCommonState = lift . putCommonState
@@ -747,9 +914,8 @@ instance PandocMonad m => PandocMonad (StateT st m) where
openURL = lift . openURL
readFileLazy = lift . readFileLazy
readFileStrict = lift . readFileStrict
- readDefaultDataFile = lift . readDefaultDataFile
- readDataFile = lift . readDataFile
glob = lift . glob
+ fileExists = lift . fileExists
getModificationTime = lift . getModificationTime
getCommonState = lift getCommonState
putCommonState = lift . putCommonState